Project import generated by Copybara.

GitOrigin-RevId: 72ff4ae24943c2ccf9905bc9e516042b0aa3dd86
This commit is contained in:
MediaPipe Team 2020-04-13 20:04:17 -04:00 committed by chuoling
parent 4c68eb4a70
commit 16e5d7242d
112 changed files with 4762 additions and 217 deletions

View File

@ -9,26 +9,32 @@
## ML Solutions in MediaPipe
* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html)
* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html)
* [Face Mesh](mediapipe/docs/face_mesh_mobile_gpu.md)
* [Hand Detection](mediapipe/docs/hand_detection_mobile_gpu.md)
* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
* [Multi-hand Tracking](mediapipe/docs/multi_hand_tracking_mobile_gpu.md)
* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html)
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html)
* [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
* [Objectron: 3D Object Detection and Tracking](mediapipe/docs/objectron_mobile_gpu.md)
* [AutoFlip](mediapipe/docs/autoflip.md)
* [AutoFlip: Intelligent Video Reframing](mediapipe/docs/autoflip.md)
![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif)
![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif)
![face_mesh](mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif)
![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif)
![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif)
![hair_segmentation](mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif)
![object_detection](mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif)
![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif)
![objectron_shoes](mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif)
![objectron_chair](mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif)
## Installation
Follow these [instructions](mediapipe/docs/install.md).
## Getting started
See mobile, desktop and Google Coral [examples](mediapipe/docs/examples.md).
See mobile, desktop, web and Google Coral [examples](mediapipe/docs/examples.md).
Check out some web demos [[Edge detection]](https://viz.mediapipe.dev/runner/demos/edge_detection/edge_detection.html) [[Face detection]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html) [[Hand Tracking]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
@ -40,10 +46,14 @@ Check out the [Examples page](https://mediapipe.readthedocs.io/en/latest/example
## Visualizing MediaPipe graphs
A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.dev/). Please also see instructions [here](mediapipe/docs/visualizer.md).
## Google Open Source Code search
Search MediaPipe Github repository using [Google Open Source code search](https://t.co/LSZnbMUUnT?amp=1)
## Videos
* [YouTube Channel](https://www.youtube.com/channel/UCObqmpuSMx-usADtL_qdMAw)
## Publications
* [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html)
* [MediaPipe Objectron: Real-time 3D Object Detection on Mobile Devices](https://mediapipe.page.link/objectron-aiblog)
* [AutoFlip: An Open Source Framework for Intelligent Video Reframing](https://mediapipe.page.link/autoflip)
* [Google Developer Blog: MediaPipe on the Web](https://mediapipe.page.link/webdevblog)
@ -52,6 +62,7 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de
* [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172)
## Events
* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020)
* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/)
* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/)
* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038)

View File

@ -184,23 +184,14 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
use_local_timestamp_ = spectrogram_options.use_local_timestamp();
if (spectrogram_options.frame_duration_seconds() <= 0.0) {
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Invalid or missing frame_duration_seconds.\n"
"frame_duration_seconds: "
<< spectrogram_options.frame_overlap_seconds();
// TODO: return an error.
}
if (spectrogram_options.frame_overlap_seconds() >=
spectrogram_options.frame_duration_seconds()) {
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Invalid frame_overlap_seconds.\nframe_overlap_seconds: "
<< spectrogram_options.frame_overlap_seconds()
<< "\nframe_duration_seconds: "
<< spectrogram_options.frame_duration_seconds();
// TODO: return an error.
}
if (spectrogram_options.frame_overlap_seconds() < 0.0) {
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Frame_overlap_seconds is < 0.0.\nframe_overlap_seconds: "
<< spectrogram_options.frame_overlap_seconds();
// TODO: return an error.
}
TimeSeriesHeader input_header;
@ -212,9 +203,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
if (!spectrogram_options.allow_multichannel_input() &&
num_input_channels_ != 1) {
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "The current setting only supports single-channel input. Please set "
"allow_multichannel_input.\n";
// TODO: return an error.
}
frame_duration_samples_ =
@ -293,10 +282,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
const Matrix& input_stream = cc->Inputs().Index(0).Get<Matrix>();
if (input_stream.rows() != num_input_channels_) {
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Number of input channels do not correspond to the number of rows "
<< "in the input matrix: " << num_input_channels_ << "channels vs "
<< input_stream.rows() << " rows";
// TODO: return an error.
}
cumulative_input_samples_ += input_stream.cols();

View File

@ -815,6 +815,38 @@ cc_test(
],
)
cc_library(
name = "split_normalized_landmark_list_calculator",
srcs = ["split_normalized_landmark_list_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/util:resource_util",
],
alwayslink = 1,
)
cc_test(
name = "split_normalized_landmark_list_calculator_test",
srcs = ["split_normalized_landmark_list_calculator_test.cc"],
deps = [
":split_normalized_landmark_list_calculator",
":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/tool:validate_type",
],
)
cc_library(
name = "dequantize_byte_array_calculator",
srcs = ["dequantize_byte_array_calculator.cc"],

View File

@ -51,8 +51,8 @@ namespace mediapipe {
class ConstantSidePacketCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
const auto& options = cc->Options().GetExtension(
::mediapipe::ConstantSidePacketCalculatorOptions::ext);
const auto& options =
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
RET_CHECK_EQ(cc->OutputSidePackets().NumEntries(kPacketTag),
options.packet_size())
<< "Number of output side packets has to be same as number of packets "
@ -80,8 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
}
::mediapipe::Status Open(CalculatorContext* cc) override {
const auto& options = cc->Options().GetExtension(
::mediapipe::ConstantSidePacketCalculatorOptions::ext);
const auto& options =
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
int index = 0;
for (CollectionItemId id = cc->OutputSidePackets().BeginId(kPacketTag);
id != cc->OutputSidePackets().EndId(kPacketTag); ++id, ++index) {

View File

@ -0,0 +1,165 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/resource_util.h"
namespace mediapipe {
// Splits an input packet with NormalizedLandmarkList into
// multiple NormalizedLandmarkList output packets using the [begin, end) ranges
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
// set to true, all ranges should be of size 1 and all outputs will be elements
// of type NormalizedLandmark. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type NormalizedLandmarkList.
// If the option "combine_outputs" is set to true, only one output stream can be
// specified and all ranges of elements will be combined into one
// NormalizedLandmarkList.
class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() == 1);
RET_CHECK(cc->Outputs().NumEntries() != 0);
cc->Inputs().Index(0).Set<NormalizedLandmarkList>();
const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
if (options.combine_outputs()) {
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Outputs().Index(0).Set<NormalizedLandmarkList>();
for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i);
const auto& range_1 = options.ranges(j);
if ((range_0.begin() >= range_1.begin() &&
range_0.begin() < range_1.end()) ||
(range_1.begin() >= range_0.begin() &&
range_1.begin() < range_0.end())) {
return ::mediapipe::InvalidArgumentError(
"Ranges must be non-overlapping when using combine_outputs "
"option.");
}
}
}
} else {
if (cc->Outputs().NumEntries() != options.ranges_size()) {
return ::mediapipe::InvalidArgumentError(
"The number of output streams should match the number of ranges "
"specified in the CalculatorOptions.");
}
// Set the output types for each output stream.
for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 ||
options.ranges(i).begin() >= options.ranges(i).end()) {
return ::mediapipe::InvalidArgumentError(
"Indices should be non-negative and begin index should be less "
"than the end index.");
}
if (options.element_only()) {
if (options.ranges(i).end() - options.ranges(i).begin() != 1) {
return ::mediapipe::InvalidArgumentError(
"Since element_only is true, all ranges should be of size 1.");
}
cc->Outputs().Index(i).Set<NormalizedLandmark>();
} else {
cc->Outputs().Index(i).Set<NormalizedLandmarkList>();
}
}
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
element_only_ = options.element_only();
combine_outputs_ = options.combine_outputs();
for (const auto& range : options.ranges()) {
ranges_.push_back({range.begin(), range.end()});
max_range_end_ = std::max(max_range_end_, range.end());
total_elements_ += range.end() - range.begin();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
const NormalizedLandmarkList& input =
cc->Inputs().Index(0).Get<NormalizedLandmarkList>();
RET_CHECK_GE(input.landmark_size(), max_range_end_);
if (combine_outputs_) {
NormalizedLandmarkList output;
for (int i = 0; i < ranges_.size(); ++i) {
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const NormalizedLandmark& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
}
}
RET_CHECK_EQ(output.landmark_size(), total_elements_);
cc->Outputs().Index(0).AddPacket(
MakePacket<NormalizedLandmarkList>(output).At(cc->InputTimestamp()));
} else {
if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket(
MakePacket<NormalizedLandmark>(input.landmark(ranges_[i].first))
.At(cc->InputTimestamp()));
}
} else {
for (int i = 0; i < ranges_.size(); ++i) {
NormalizedLandmarkList output;
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const NormalizedLandmark& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
}
cc->Outputs().Index(i).AddPacket(
MakePacket<NormalizedLandmarkList>(output).At(
cc->InputTimestamp()));
}
}
}
return ::mediapipe::OkStatus();
}
private:
std::vector<std::pair<int32, int32>> ranges_;
int32 max_range_end_ = -1;
int32 total_elements_ = 0;
bool element_only_ = false;
bool combine_outputs_ = false;
};
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_

View File

@ -0,0 +1,404 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
#include "mediapipe/framework/tool/validate_type.h"
namespace mediapipe {
constexpr float kLocationVal = 3;
class SplitNormalizedLandmarkListCalculatorTest : public ::testing::Test {
protected:
void TearDown() { expected_landmarks_.reset(); }
void PrepareNormalizedLandmarkList(int list_size) {
// Prepare input landmark list.
input_landmarks_ = absl::make_unique<NormalizedLandmarkList>();
expected_landmarks_ = absl::make_unique<NormalizedLandmarkList>();
for (int i = 0; i < list_size; ++i) {
NormalizedLandmark* landmark = input_landmarks_->add_landmark();
landmark->set_x(i * kLocationVal);
landmark->set_y(i * kLocationVal);
landmark->set_z(i * kLocationVal);
// Save the landmarks for comparison after the graph runs.
*expected_landmarks_->add_landmark() = *landmark;
}
}
void ValidateListOutput(std::vector<Packet>& output_packets,
int expected_elements, int input_begin_index) {
ASSERT_EQ(1, output_packets.size());
const NormalizedLandmarkList& output_landmarks =
output_packets[0].Get<NormalizedLandmarkList>();
ASSERT_EQ(expected_elements, output_landmarks.landmark_size());
for (int i = 0; i < expected_elements; ++i) {
const NormalizedLandmark& expected_landmark =
expected_landmarks_->landmark(input_begin_index + i);
const NormalizedLandmark& result = output_landmarks.landmark(i);
EXPECT_FLOAT_EQ(expected_landmark.x(), result.x());
EXPECT_FLOAT_EQ(expected_landmark.y(), result.y());
EXPECT_FLOAT_EQ(expected_landmark.z(), result.z());
}
}
void ValidateCombinedListOutput(std::vector<Packet>& output_packets,
int expected_elements,
std::vector<int>& input_begin_indices,
std::vector<int>& input_end_indices) {
ASSERT_EQ(1, output_packets.size());
ASSERT_EQ(input_begin_indices.size(), input_end_indices.size());
const NormalizedLandmarkList& output_landmarks =
output_packets[0].Get<NormalizedLandmarkList>();
ASSERT_EQ(expected_elements, output_landmarks.landmark_size());
const int num_ranges = input_begin_indices.size();
int element_id = 0;
for (int range_id = 0; range_id < num_ranges; ++range_id) {
for (int i = input_begin_indices[range_id];
i < input_end_indices[range_id]; ++i) {
const NormalizedLandmark& expected_landmark =
expected_landmarks_->landmark(i);
const NormalizedLandmark& result =
output_landmarks.landmark(element_id);
EXPECT_FLOAT_EQ(expected_landmark.x(), result.x());
EXPECT_FLOAT_EQ(expected_landmark.y(), result.y());
EXPECT_FLOAT_EQ(expected_landmark.z(), result.z());
element_id++;
}
}
}
void ValidateElementOutput(std::vector<Packet>& output_packets,
int input_begin_index) {
ASSERT_EQ(1, output_packets.size());
const NormalizedLandmark& output_landmark =
output_packets[0].Get<NormalizedLandmark>();
ASSERT_TRUE(output_landmark.IsInitialized());
const NormalizedLandmark& expected_landmark =
expected_landmarks_->landmark(input_begin_index);
EXPECT_FLOAT_EQ(expected_landmark.x(), output_landmark.x());
EXPECT_FLOAT_EQ(expected_landmark.y(), output_landmark.y());
EXPECT_FLOAT_EQ(expected_landmark.z(), output_landmark.z());
}
std::unique_ptr<NormalizedLandmarkList> input_landmarks_ = nullptr;
std::unique_ptr<NormalizedLandmarkList> expected_landmarks_ = nullptr;
std::unique_ptr<CalculatorRunner> runner_ = nullptr;
};
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTest) {
PrepareNormalizedLandmarkList(/*list_size=*/5);
ASSERT_NE(input_landmarks_, nullptr);
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
output_stream: "range_1"
output_stream: "range_2"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 4 }
ranges: { begin: 4 end: 5 }
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
std::vector<Packet> range_1_packets;
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
std::vector<Packet> range_2_packets;
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
// Wait until the calculator finishes processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
ValidateListOutput(range_0_packets, /*expected_elements=*/1,
/*input_begin_index=*/0);
ValidateListOutput(range_1_packets, /*expected_elements=*/3,
/*input_begin_index=*/1);
ValidateListOutput(range_2_packets, /*expected_elements=*/1,
/*input_begin_index=*/4);
// Fully close the graph at the end.
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest, InvalidRangeTest) {
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 0 }
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
// The graph should fail running because of an invalid range (begin == end).
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
InvalidOutputStreamCountTest) {
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
output_stream: "range_1"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
// The graph should fail running because the number of output streams does not
// match the number of range elements in the options.
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
InvalidCombineOutputsMultipleOutputsTest) {
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
output_stream: "range_1"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
combine_outputs: true
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
// The graph should fail running because the number of output streams does not
// match the number of range elements in the options.
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
InvalidOverlappingRangesTest) {
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 3 }
ranges: { begin: 1 end: 4 }
combine_outputs: true
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
// The graph should fail running because there are overlapping ranges.
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestElementOnly) {
PrepareNormalizedLandmarkList(/*list_size=*/5);
ASSERT_NE(input_landmarks_, nullptr);
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
output_stream: "range_1"
output_stream: "range_2"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 4 end: 5 }
element_only: true
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
std::vector<Packet> range_1_packets;
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
std::vector<Packet> range_2_packets;
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
// Wait until the calculator finishes processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
ValidateElementOutput(range_0_packets,
/*input_begin_index=*/0);
ValidateElementOutput(range_1_packets,
/*input_begin_index=*/2);
ValidateElementOutput(range_2_packets,
/*input_begin_index=*/4);
// Fully close the graph at the end.
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestCombiningOutputs) {
PrepareNormalizedLandmarkList(/*list_size=*/5);
ASSERT_NE(input_landmarks_, nullptr);
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 4 end: 5 }
combine_outputs: true
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
// Wait until the calculator finishes processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
std::vector<int> input_begin_indices = {0, 2, 4};
std::vector<int> input_end_indices = {1, 3, 5};
ValidateCombinedListOutput(range_0_packets, /*expected_elements=*/3,
input_begin_indices, input_end_indices);
// Fully close the graph at the end.
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
ElementOnlyDisablesVectorOutputs) {
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "landmarks_in"
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks_in"
output_stream: "range_0"
output_stream: "range_1"
output_stream: "range_2"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 4 }
ranges: { begin: 4 end: 5 }
element_only: true
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
} // namespace mediapipe

View File

@ -376,13 +376,12 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
::mediapipe::Status ImageTransformationCalculator::RenderCpu(
CalculatorContext* cc) {
int input_width = cc->Inputs().Tag("IMAGE").Get<ImageFrame>().Width();
int input_height = cc->Inputs().Tag("IMAGE").Get<ImageFrame>().Height();
const auto& input_img = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
cv::Mat input_mat = formats::MatView(&input_img);
cv::Mat scaled_mat;
const int input_width = input_img.Width();
const int input_height = input_img.Height();
if (!output_height_ || !output_width_) {
output_height_ = input_height;
output_width_ = input_width;
@ -455,8 +454,9 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
::mediapipe::Status ImageTransformationCalculator::RenderGpu(
CalculatorContext* cc) {
#if !defined(MEDIAPIPE_DISABLE_GPU)
int input_width = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>().width();
int input_height = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>().height();
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>();
const int input_width = input.width();
const int input_height = input.height();
int output_width;
int output_height;
@ -472,7 +472,6 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
.Add(padding.release(), cc->InputTimestamp());
}
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>();
QuadRenderer* renderer = nullptr;
GlTexture src1;

View File

@ -244,7 +244,7 @@ class PackMediaSequenceCalculator : public CalculatorBase {
::mediapipe::Status VerifySequence() {
std::string error_msg = "Missing features - ";
bool all_present = true;
for (auto iter : features_present_) {
for (const auto& iter : features_present_) {
if (!iter.second) {
all_present = false;
absl::StrAppend(&error_msg, iter.first, ", ");

View File

@ -126,7 +126,7 @@ class TensorFlowSessionFromSavedModelCalculator : public CalculatorBase {
// Set user specified tags properly.
// If no tags specified will use tensorflow::kSavedModelTagServe by default.
std::unordered_set<std::string> tags_set;
for (std::string tag : options.saved_model_tag()) {
for (const std::string& tag : options.saved_model_tag()) {
tags_set.insert(tag);
}
if (tags_set.empty()) {

View File

@ -121,7 +121,7 @@ class TensorFlowSessionFromSavedModelGenerator : public PacketGenerator {
// Set user specified tags properly.
// If no tags specified will use tensorflow::kSavedModelTagServe by default.
std::unordered_set<std::string> tags_set;
for (std::string tag : options.saved_model_tag()) {
for (const std::string& tag : options.saved_model_tag()) {
tags_set.insert(tag);
}
if (tags_set.empty()) {

View File

@ -513,6 +513,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
if (gpu_inference_) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
interpreter_ = nullptr;
delegate_ = nullptr;
for (int i = 0; i < gpu_data_in_.size(); ++i) {
gpu_data_in_[i].reset();
@ -523,6 +524,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
return ::mediapipe::OkStatus();
}));
#elif defined(MEDIAPIPE_IOS)
interpreter_ = nullptr;
delegate_ = nullptr;
for (int i = 0; i < gpu_data_in_.size(); ++i) {
gpu_data_in_[i].reset();
@ -532,6 +534,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
}
#endif
} else {
interpreter_ = nullptr;
delegate_ = nullptr;
}
}

View File

@ -99,7 +99,7 @@ void ConvertAnchorsToRawValues(const std::vector<Anchor>& anchors,
int num_boxes, float* raw_anchors) {
CHECK_EQ(anchors.size(), num_boxes);
int box = 0;
for (auto anchor : anchors) {
for (const auto& anchor : anchors) {
raw_anchors[box * kNumCoordsPerBox + 0] = anchor.y_center();
raw_anchors[box * kNumCoordsPerBox + 1] = anchor.x_center();
raw_anchors[box * kNumCoordsPerBox + 2] = anchor.h();

View File

@ -71,8 +71,10 @@ REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator);
const auto& options =
cc->Options<::mediapipe::DetectionLabelIdToTextCalculatorOptions>();
if (options.has_label_map_path()) {
std::string string_path;
ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path()));
ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options.label_map_path()));
std::string label_map_string;
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
@ -82,6 +84,11 @@ REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator);
while (std::getline(stream, line)) {
label_map_[i++] = line;
}
} else {
for (int i = 0; i < options.label_size(); ++i) {
label_map_[i] = options.label(i);
}
}
return ::mediapipe::OkStatus();
}

View File

@ -25,4 +25,10 @@ message DetectionLabelIdToTextCalculatorOptions {
// Path to a label map file for getting the actual name of detected classes.
optional string label_map_path = 1;
// Alternative way to specify label map
// label: "label for id 0"
// label: "label for id 1"
// ...
repeated string label = 2;
}

View File

@ -186,6 +186,7 @@ class LandmarksToRenderDataCalculator : public CalculatorBase {
private:
LandmarksToRenderDataCalculatorOptions options_;
std::vector<int> landmark_connections_;
};
REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
@ -217,6 +218,14 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<LandmarksToRenderDataCalculatorOptions>();
// Parse landmarks connections to a vector.
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
<< "Number of entries in landmark connections must be a multiple of 2";
for (int i = 0; i < options_.landmark_connections_size(); ++i) {
landmark_connections_.push_back(options_.landmark_connections(i));
}
return ::mediapipe::OkStatus();
}
@ -236,14 +245,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
thickness *= render_scale;
}
// Parse landmarks connections to a vector.
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
<< "Number of entries in landmark connections must be a multiple of 2";
std::vector<int> landmark_connections;
for (int i = 0; i < options_.landmark_connections_size(); i += 1) {
landmark_connections.push_back(options_.landmark_connections(i));
}
if (cc->Inputs().HasTag(kLandmarksTag)) {
const LandmarkList& landmarks =
cc->Inputs().Tag(kLandmarksTag).Get<LandmarkList>();
@ -252,6 +253,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
}
// Only change rendering if there are actually z values other than 0.
visualize_depth &= ((z_max - z_min) > 1e-3);
if (visualize_depth) {
AddConnectionsWithDepth<LandmarkList, Landmark>(
landmarks, landmark_connections_, thickness, /*normalized=*/false,
z_min, z_max, render_data.get());
} else {
AddConnections<LandmarkList, Landmark>(
landmarks, landmark_connections_, options_.connection_color(),
thickness, /*normalized=*/false, render_data.get());
}
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const Landmark& landmark = landmarks.landmark(i);
auto* landmark_data_render = AddPointRenderData(
@ -265,15 +275,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
landmark_data->set_x(landmark.x());
landmark_data->set_y(landmark.y());
}
if (visualize_depth) {
AddConnectionsWithDepth<LandmarkList, Landmark>(
landmarks, landmark_connections, thickness, /*normalized=*/false,
z_min, z_max, render_data.get());
} else {
AddConnections<LandmarkList, Landmark>(
landmarks, landmark_connections, options_.connection_color(),
thickness, /*normalized=*/false, render_data.get());
}
}
if (cc->Inputs().HasTag(kNormLandmarksTag)) {
@ -285,6 +286,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
}
// Only change rendering if there are actually z values other than 0.
visualize_depth &= ((z_max - z_min) > 1e-3);
if (visualize_depth) {
AddConnectionsWithDepth<NormalizedLandmarkList, NormalizedLandmark>(
landmarks, landmark_connections_, thickness, /*normalized=*/true,
z_min, z_max, render_data.get());
} else {
AddConnections<NormalizedLandmarkList, NormalizedLandmark>(
landmarks, landmark_connections_, options_.connection_color(),
thickness, /*normalized=*/true, render_data.get());
}
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const NormalizedLandmark& landmark = landmarks.landmark(i);
auto* landmark_data_render = AddPointRenderData(
@ -298,15 +308,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
landmark_data->set_x(landmark.x());
landmark_data->set_y(landmark.y());
}
if (visualize_depth) {
AddConnectionsWithDepth<NormalizedLandmarkList, NormalizedLandmark>(
landmarks, landmark_connections, thickness, /*normalized=*/true,
z_min, z_max, render_data.get());
} else {
AddConnections<NormalizedLandmarkList, NormalizedLandmark>(
landmarks, landmark_connections, options_.connection_color(),
thickness, /*normalized=*/true, render_data.get());
}
}
cc->Outputs()

View File

@ -73,6 +73,18 @@ can be easily adapted to run on CPU v.s. GPU.
* [Android](./face_detection_mobile_cpu.md)
* [iOS](./face_detection_mobile_cpu.md)
### Face Mesh with GPU
[Face Mesh with GPU](./face_mesh_mobile_gpu.md) illustrates how to run the
MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time
on mobile devices, utilizing GPU acceleration. The pipeline is based on
["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724),
and details of the underlying ML models are described in the
[model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view).
* [Android](./face_mesh_mobile_gpu.md)
* [iOS](./face_mesh_mobile_gpu.md)
### Hand Detection with GPU
[Hand Detection with GPU](./hand_detection_mobile_gpu.md) illustrates how to use
@ -84,7 +96,7 @@ MediaPipe with a TFLite model for hand detection in a GPU-accelerated pipeline.
### Hand Tracking with GPU
[Hand Tracking with GPU](./hand_tracking_mobile_gpu.md) illustrates how to use
MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline.
MediaPipe with TFLite models for hand tracking in a GPU-accelerated pipeline.
* [Android](./hand_tracking_mobile_gpu.md)
* [iOS](./hand_tracking_mobile_gpu.md)
@ -92,7 +104,7 @@ MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline.
### Multi-Hand Tracking with GPU
[Multi-Hand Tracking with GPU](./multi_hand_tracking_mobile_gpu.md) illustrates
how to use MediaPipe with a TFLite model for multi-hand tracking in a
how to use MediaPipe with TFLite models for multi-hand tracking in a
GPU-accelerated pipeline.
* [Android](./multi_hand_tracking_mobile_gpu.md)
@ -150,11 +162,20 @@ GPU with live video from a webcam.
* [Desktop GPU](./face_detection_desktop.md)
* [Desktop CPU](./face_detection_desktop.md)
### Face Mesh on Desktop with Webcam
[Face Mesh on Desktop with Webcam](./face_mesh_desktop.md) shows how to run the
MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time
on desktop with webcam input.
* [Desktop GPU](./face_mesh_desktop.md)
* [Desktop CPU](./face_mesh_desktop.md)
### Hand Tracking on Desktop with Webcam
[Hand Tracking on Desktop with Webcam](./hand_tracking_desktop.md) shows how to
use MediaPipe with a TFLite model for hand tracking on desktop using CPU or GPU
use MediaPipe with TFLite models for hand tracking on desktop using CPU or GPU
with live video from a webcam.
* [Desktop GPU](./hand_tracking_desktop.md)
@ -163,8 +184,8 @@ with live video from a webcam.
### Multi-Hand Tracking on Desktop with Webcam
[Multi-Hand Tracking on Desktop with Webcam](./multi_hand_tracking_desktop.md)
shows how to use MediaPipe with a TFLite model for multi-hand tracking on
desktop using CPU or GPU with live video from a webcam.
shows how to use MediaPipe with TFLite models for multi-hand tracking on desktop
using CPU or GPU with live video from a webcam.
* [Desktop GPU](./multi_hand_tracking_desktop.md)
* [Desktop CPU](./multi_hand_tracking_desktop.md)

View File

@ -4,6 +4,8 @@ This doc focuses on the
[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_detection/face_detection_mobile_cpu.pbtxt)
that performs face detection with TensorFlow Lite on CPU.
![face_detection_android_gpu_gif](images/mobile/face_detection_android_gpu.gif)
## Android
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu)

View File

@ -0,0 +1,58 @@
## Face Mesh on Desktop with Webcam
This doc focuses on running the **MediaPipe Face Mesh** pipeline to perform 3D
face landmark estimation in real-time on desktop with webcam input. The pipeline
internally incorporates TensorFlow Lite models. To know more about the models,
please refer to the model
[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh).
Moreover, if you are interested in running the same pipeline on Android/iOS,
please see [Face Mesh on Android/iOS](face_mesh_mobile_gpu.md).
- [Face Mesh on Desktop with Webcam (CPU)](#face-mesh-on-desktop-with-webcam-cpu)
- [Face Mesh on Desktop with Webcam (GPU)](#face-mesh-on-desktop-with-webcam-gpu)
Note: Desktop GPU works only on Linux. Mesa drivers need to be installed. Please
see
[step 4 of "Installing on Debian and Ubuntu" in the installation guide](./install.md).
Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section.
### Face Mesh on Desktop with Webcam (CPU)
To build and run Face Mesh on desktop with webcam (CPU), run:
```bash
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/face_mesh:face_mesh_cpu
# It should print:
# Target //mediapipe/examples/desktop/face_mesh:face_mesh_cpu up-to-date:
# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu
# This will open up your webcam as long as it is connected. Errors are likely
# due to your webcam being not accessible.
$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu \
--calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
```
### Face Mesh on Desktop with Webcam (GPU)
Note: please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
To build and run Face Mesh on desktop with webcam (GPU), run:
```bash
# This works only for linux currently
$ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \
mediapipe/examples/desktop/face_mesh:face_mesh_gpu
# It should print:
# Target //mediapipe/examples/desktop/face_mesh:face_mesh_gpu up-to-date:
# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu
# This will open up your webcam as long as it is connected. Errors are likely
# due to your webcam being not accessible, or GPU drivers not setup properly.
$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu \
--calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
```

View File

@ -0,0 +1,90 @@
# Face Mesh (GPU)
This example focuses on running the **MediaPipe Face Mesh** pipeline on mobile
devices to perform 3D face landmark estimation in real-time, utilizing GPU
acceleration. The pipeline internally incorporates TensorFlow Lite models. To
know more about the models, please refer to the model
[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh).
The pipeline is related to the
[face detection example](./face_detection_mobile_gpu.md) as it internally
utilizes face detection and performs landmark estimation only within the
detected region.
![face_mesh_android_gpu.gif](images/mobile/face_mesh_android_gpu.gif)
**MediaPipe Face Mesh** generates 468 3D face landmarks in real-time on mobile
devices. In the visualization above, the red dots represent the landmarks, and
the green lines connecting landmarks illustrate the contours around the eyes,
eyebrows, lips and the entire face.
## Android
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu)
A prebuilt arm64 APK can be
[downloaded here](https://drive.google.com/open?id=1pUmd7CXCL_onYMbsZo5p91cH0oNnR4gi).
To build the app yourself, run:
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu
```
Once the app is built, install it on Android device with:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/facemeshgpu.apk
```
## iOS
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
examples and generating an Xcode project. This will be the FaceMeshGpuApp
target.
To build on the command line:
```bash
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp
```
## Graph
The face mesh [main graph](#main-graph) utilizes a
[face landmark subgraph](#face-landmark-subgraph) from the
[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark),
and renders using a dedicated [face renderer subgraph](#face-renderer-subgraph).
The subgraphs show up in the main graph visualization as nodes colored in
purple, and the subgraph itself can also be visualized just like a regular
graph. For more information on how to visualize a graph that includes subgraphs,
see the Visualizing Subgraphs section in the
[visualizer documentation](./visualizer.md).
### Main Graph
![face_mesh_mobile_graph](images/mobile/face_mesh_mobile.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
### Face Landmark Subgraph
The
[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark)
contains several subgraphs that can be used to detect and track face landmarks.
In particular, in this example the
[FaceLandmarkFrontGPU](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
subgraph, suitable for images from front-facing cameras (i.e., selfie images)
and utilizing GPU acceleration, is selected.
![face_landmark_front_gpu_subgraph](images/mobile/face_landmark_front_gpu_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
### Face Renderer Subgraph
![face_renderer_gpu_subgraph](images/mobile/face_renderer_gpu_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt)

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

After

Width:  |  Height:  |  Size: 2.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 350 KiB

After

Width:  |  Height:  |  Size: 808 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 MiB

After

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 430 KiB

After

Width:  |  Height:  |  Size: 460 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 MiB

After

Width:  |  Height:  |  Size: 3.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 383 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 MiB

After

Width:  |  Height:  |  Size: 5.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 MiB

After

Width:  |  Height:  |  Size: 4.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

After

Width:  |  Height:  |  Size: 448 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 MiB

After

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 MiB

After

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 MiB

After

Width:  |  Height:  |  Size: 1004 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

After

Width:  |  Height:  |  Size: 945 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 282 KiB

After

Width:  |  Height:  |  Size: 336 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 MiB

After

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

After

Width:  |  Height:  |  Size: 730 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 670 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 661 KiB

View File

@ -12,7 +12,7 @@ We show the object detection demo with both TensorFlow model and TensorFlow Lite
- [TensorFlow Object Detection Demo](#tensorflow-object-detection-demo)
- [TensorFlow Lite Object Detection Demo](#tensorflow-lite-object-detection-demo)
- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo)
- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo-with-webcam-cpu)
Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section.

View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.facemeshgpu">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<!-- For MediaPipe -->
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
<application
android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -0,0 +1,82 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/face_mesh:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
# easily incorporated into the app via, for example,
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph"],
outs = ["facemeshgpu.binarypb"],
cmd = "cp $< $@",
)
android_library(
name = "mediapipe_lib",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
deps = [
":mediapipe_jni_lib",
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_lifecycle_lifecycle_common",
"@maven//:com_google_guava_guava",
],
)
android_binary(
name = "facemeshgpu",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.facemeshgpu"},
multidex = "native",
deps = [
":mediapipe_lib",
],
)

View File

@ -0,0 +1,232 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.facemeshgpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Log;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.framework.AndroidPacketCreator;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.framework.PacketGetter;
import com.google.mediapipe.glutil.EglManager;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** Main activity of MediaPipe example apps. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private static final String BINARY_GRAPH_NAME = "facemeshgpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT;
// Max number of faces to detect/process.
private static final int NUM_FACES = 1;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
private static final boolean FLIP_FRAMES_VERTICALLY = true;
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
}
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
private SurfaceView previewDisplayView;
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES));
processor.setInputSidePackets(inputSidePackets);
processor.addPacketCallback(
OUTPUT_LANDMARKS_STREAM_NAME,
(packet) -> {
Log.d(TAG, "Received multi face landmarks packet.");
List<NormalizedLandmarkList> multiFaceLandmarks =
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
Log.d(
TAG,
"[TS:"
+ packet.getTimestamp()
+ "] "
+ getMultiFaceLandmarksDebugString(multiFaceLandmarks));
});
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@Override
protected void onResume() {
super.onResume();
converter = new ExternalTextureConverter(eglManager.getContext());
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
converter.setConsumer(processor);
if (PermissionHelper.cameraPermissionsGranted(this)) {
startCamera();
}
}
@Override
protected void onPause() {
super.onPause();
converter.close();
}
@Override
public void onRequestPermissionsResult(
int requestCode, String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
viewGroup.addView(previewDisplayView);
previewDisplayView
.getHolder()
.addCallback(
new SurfaceHolder.Callback() {
@Override
public void surfaceCreated(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
// (Re-)Compute the ideal size of the camera-preview display (the area that the
// camera-preview frames get rendered onto, potentially with scaling and rotation)
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(null);
}
});
}
private void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
private static String getMultiFaceLandmarksDebugString(
List<NormalizedLandmarkList> multiFaceLandmarks) {
if (multiFaceLandmarks.isEmpty()) {
return "No face landmarks";
}
String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n";
int faceIndex = 0;
for (NormalizedLandmarkList landmarks : multiFaceLandmarks) {
multiFaceLandmarksStr +=
"\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n";
int landmarkIndex = 0;
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
multiFaceLandmarksStr +=
"\t\tLandmark ["
+ landmarkIndex
+ "]: ("
+ landmark.getX()
+ ", "
+ landmark.getY()
+ ", "
+ landmark.getZ()
+ ")\n";
++landmarkIndex;
}
++faceIndex;
}
return multiFaceLandmarksStr;
}
}

View File

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent">
<FrameLayout
android:id="@+id/preview_display_layout"
android:layout_width="fill_parent"
android:layout_height="fill_parent"
android:layout_weight="1">
<TextView
android:id="@+id/no_camera_access_view"
android:layout_height="fill_parent"
android:layout_width="fill_parent"
android:gravity="center"
android:text="@string/no_camera_access" />
</FrameLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#008577</color>
<color name="colorPrimaryDark">#00574B</color>
<color name="colorAccent">#D81B60</color>
</resources>

View File

@ -0,0 +1,4 @@
<resources>
<string name="app_name" translatable="false">Face Mesh GPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -0,0 +1,11 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
</resources>

View File

@ -150,4 +150,34 @@ message ConversionOptions {
optional int32 target_height = 2;
}
// TODO: Move other autoflip messages into this area.
// Self-contained message that provides all needed information to render
// autoflip with an external renderer. One of these messages is required for
// each frame of the video.
message ExternalRenderFrame {
// Rectangle using opencv standard.
message Rect {
optional float x = 1;
optional float y = 2;
optional float width = 3;
optional float height = 4;
}
// RGB color [0...255]
message Color {
optional int32 r = 1;
optional int32 g = 2;
optional int32 b = 3;
}
// Rect that must be cropped out of the input frame. It is in the
// original dimensions of the input video. The first step to render this
// frame is to crop this rect from the input frame.
optional Rect crop_from_location = 1;
// The placement location where the above rect is placed on the output frame.
// This will always have the same aspect ratio as the above rect but scaling
// may be required.
optional Rect render_to_location = 2;
// If render_to_location is smaller than the output dimensions of the frame,
// fill the rest of the frame with this color.
optional Color padding_color = 3;
// Timestamp in microseconds of this frame.
optional uint64 timestamp_us = 4;
}

View File

@ -44,11 +44,19 @@ constexpr char kInputExternalSettings[] = "EXTERNAL_SETTINGS";
// TargetSizeType::MAXIMIZE_TARGET_DIMENSION
constexpr char kAspectRatio[] = "EXTERNAL_ASPECT_RATIO";
// Output the cropped frames, as well as visualization of crop regions and focus
// points. Note that, KEY_FRAME_CROP_REGION_VIZ_FRAMES and
// SALIENT_POINT_FRAME_VIZ_FRAMES can only be enabled when CROPPED_FRAMES is
// enabled.
constexpr char kOutputCroppedFrames[] = "CROPPED_FRAMES";
constexpr char kOutputKeyFrameCropViz[] = "KEY_FRAME_CROP_REGION_VIZ_FRAMES";
constexpr char kOutputFocusPointFrameViz[] = "SALIENT_POINT_FRAME_VIZ_FRAMES";
constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
// External rendering outputs
constexpr char kExternalRenderingPerFrame[] = "EXTERNAL_RENDERING_PER_FRAME";
constexpr char kExternalRenderingFullVid[] = "EXTERNAL_RENDERING_FULL_VID";
::mediapipe::Status SceneCroppingCalculator::GetContract(
::mediapipe::CalculatorContract* cc) {
if (cc->InputSidePackets().HasTag(kInputExternalSettings)) {
@ -67,16 +75,36 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
}
cc->Inputs().Tag(kInputShotBoundaries).Set<bool>();
if (cc->Outputs().HasTag(kOutputCroppedFrames)) {
cc->Outputs().Tag(kOutputCroppedFrames).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames))
<< "KEY_FRAME_CROP_REGION_VIZ_FRAMES can only be used when "
"CROPPED_FRAMES is specified.";
cc->Outputs().Tag(kOutputKeyFrameCropViz).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames))
<< "SALIENT_POINT_FRAME_VIZ_FRAMES can only be used when "
"CROPPED_FRAMES is specified.";
cc->Outputs().Tag(kOutputFocusPointFrameViz).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kOutputSummary)) {
cc->Outputs().Tag(kOutputSummary).Set<VideoCroppingSummary>();
}
if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) {
cc->Outputs().Tag(kExternalRenderingPerFrame).Set<ExternalRenderFrame>();
}
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
cc->Outputs()
.Tag(kExternalRenderingFullVid)
.Set<std::vector<ExternalRenderFrame>>();
}
RET_CHECK(cc->Outputs().HasTag(kExternalRenderingPerFrame) ||
cc->Outputs().HasTag(kExternalRenderingFullVid) ||
cc->Outputs().HasTag(kOutputCroppedFrames))
<< "At leaset one output stream must be specified";
return ::mediapipe::OkStatus();
}
@ -104,6 +132,11 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
if (cc->Outputs().HasTag(kOutputSummary)) {
summary_ = absl::make_unique<VideoCroppingSummary>();
}
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
external_render_list_ =
absl::make_unique<std::vector<ExternalRenderFrame>>();
}
should_perform_frame_cropping_ = cc->Outputs().HasTag(kOutputCroppedFrames);
return ::mediapipe::OkStatus();
}
@ -127,6 +160,28 @@ namespace {
*aspect_ratio = width_ratio / height_ratio;
return ::mediapipe::OkStatus();
}
void ConstructExternalRenderMessage(
const cv::Rect& crop_from_location, const cv::Rect& render_to_location,
const cv::Scalar& padding_color, const uint64 timestamp_us,
ExternalRenderFrame* external_render_message) {
auto crop_from_message =
external_render_message->mutable_crop_from_location();
crop_from_message->set_x(crop_from_location.x);
crop_from_message->set_y(crop_from_location.y);
crop_from_message->set_width(crop_from_location.width);
crop_from_message->set_height(crop_from_location.height);
auto render_to_message =
external_render_message->mutable_render_to_location();
render_to_message->set_x(render_to_location.x);
render_to_message->set_y(render_to_location.y);
render_to_message->set_width(render_to_location.width);
render_to_message->set_height(render_to_location.height);
auto padding_color_message = external_render_message->mutable_padding_color();
padding_color_message->set_r(padding_color[0]);
padding_color_message->set_g(padding_color[1]);
padding_color_message->set_b(padding_color[2]);
external_render_message->set_timestamp_us(timestamp_us);
}
} // namespace
::mediapipe::Status SceneCroppingCalculator::Process(
@ -230,8 +285,9 @@ namespace {
is_end_of_scene = cc->Inputs().Tag(kInputShotBoundaries).Get<bool>();
}
const bool force_buffer_flush =
scene_frames_.size() >= options_.max_scene_size();
if (!scene_frames_.empty() && (is_end_of_scene || force_buffer_flush)) {
scene_frame_timestamps_.size() >= options_.max_scene_size();
if (!scene_frame_timestamps_.empty() &&
(is_end_of_scene || force_buffer_flush)) {
MP_RETURN_IF_ERROR(ProcessScene(is_end_of_scene, cc));
}
@ -240,11 +296,14 @@ namespace {
LOG_EVERY_N(ERROR, 10)
<< "------------------------ (Breathing) Time(s): "
<< cc->Inputs().Tag(kInputVideoFrames).Value().Timestamp().Seconds();
// Only buffer frames if |should_perform_frame_cropping_| is true.
if (should_perform_frame_cropping_) {
const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get<ImageFrame>();
const cv::Mat frame_mat = formats::MatView(&frame);
cv::Mat copy_mat;
frame_mat.copyTo(copy_mat);
scene_frames_.push_back(copy_mat);
scene_frames_or_empty_.push_back(copy_mat);
}
scene_frame_timestamps_.push_back(cc->InputTimestamp().Value());
is_key_frames_.push_back(
!cc->Inputs().Tag(kInputDetections).Value().IsEmpty());
@ -274,7 +333,7 @@ namespace {
::mediapipe::Status SceneCroppingCalculator::Close(
::mediapipe::CalculatorContext* cc) {
if (!scene_frames_.empty()) {
if (!scene_frame_timestamps_.empty()) {
MP_RETURN_IF_ERROR(ProcessScene(/* is_end_of_scene = */ true, cc));
}
if (cc->Outputs().HasTag(kOutputSummary)) {
@ -282,16 +341,25 @@ namespace {
.Tag(kOutputSummary)
.Add(summary_.release(), Timestamp::PostStream());
}
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
cc->Outputs()
.Tag(kExternalRenderingFullVid)
.Add(external_render_list_.release(), Timestamp::PostStream());
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders() {
int top_border_size = 0, bottom_border_size = 0;
// TODO: split this function into two, one for calculating the border
// sizes, the other for the actual removal of borders from the frames.
::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders(
int* top_border_size, int* bottom_border_size) {
*top_border_size = 0;
*bottom_border_size = 0;
MP_RETURN_IF_ERROR(ComputeSceneStaticBordersSize(
static_features_, &top_border_size, &bottom_border_size));
static_features_, top_border_size, bottom_border_size));
const double scale = static_cast<double>(frame_height_) / key_frame_height_;
top_border_distance_ = std::round(scale * top_border_size);
const int bottom_border_distance = std::round(scale * bottom_border_size);
top_border_distance_ = std::round(scale * *top_border_size);
const int bottom_border_distance = std::round(scale * *bottom_border_size);
effective_frame_height_ =
frame_height_ - top_border_distance_ - bottom_border_distance;
@ -301,10 +369,10 @@ namespace {
// Remove borders from frames.
cv::Rect roi(0, top_border_distance_, frame_width_,
effective_frame_height_);
for (int i = 0; i < scene_frames_.size(); ++i) {
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
cv::Mat tmp;
scene_frames_[i](roi).copyTo(tmp);
scene_frames_[i] = tmp;
scene_frames_or_empty_[i](roi).copyTo(tmp);
scene_frames_or_empty_[i] = tmp;
}
// Adjust detection bounding boxes.
for (int i = 0; i < key_frame_infos_.size(); ++i) {
@ -373,7 +441,9 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
FilterKeyFrameInfo();
// Removes any static borders.
MP_RETURN_IF_ERROR(RemoveStaticBorders());
int top_static_border_size, bottom_static_border_size;
MP_RETURN_IF_ERROR(
RemoveStaticBorders(&top_static_border_size, &bottom_static_border_size));
// Decides if solid background color padding is possible and sets up color
// interpolation functions in CIELAB. Uses linear interpolation by default.
@ -409,20 +479,31 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
// Crops scene frames.
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
auto* cropped_frames_ptr =
should_perform_frame_cropping_ ? &cropped_frames : nullptr;
MP_RETURN_IF_ERROR(scene_cropper_->CropFrames(
scene_summary, scene_frames_, focus_point_frames,
prior_focus_point_frames_, &cropped_frames));
scene_summary, scene_frame_timestamps_.size(), scene_frames_or_empty_,
focus_point_frames, prior_focus_point_frames_, top_static_border_size,
bottom_static_border_size, &crop_from_locations, cropped_frames_ptr));
// Formats and outputs cropped frames.
bool apply_padding = false;
float vertical_fill_precent;
std::vector<cv::Rect> render_to_locations;
cv::Scalar padding_color;
if (should_perform_frame_cropping_) {
MP_RETURN_IF_ERROR(FormatAndOutputCroppedFrames(
cropped_frames, &apply_padding, &vertical_fill_precent, cc));
cropped_frames, &render_to_locations, &apply_padding, &padding_color,
&vertical_fill_precent, cc));
}
// Caches prior FocusPointFrames if this was not the end of a scene.
prior_focus_point_frames_.clear();
if (!is_end_of_scene) {
const int start = std::max(0, static_cast<int>(scene_frames_.size()) -
const int start =
std::max(0, static_cast<int>(scene_frame_timestamps_.size()) -
options_.prior_frame_buffer_size());
for (int i = start; i < num_key_frames; ++i) {
prior_focus_point_frames_.push_back(focus_point_frames[i]);
@ -449,8 +530,31 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
scene_summary->set_is_padded(apply_padding);
}
if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) {
for (int i = 0; i < scene_frame_timestamps_.size(); i++) {
auto external_render_message = absl::make_unique<ExternalRenderFrame>();
ConstructExternalRenderMessage(
crop_from_locations[i], render_to_locations[i], padding_color,
scene_frame_timestamps_[i], external_render_message.get());
cc->Outputs()
.Tag(kExternalRenderingPerFrame)
.Add(external_render_message.release(),
Timestamp(scene_frame_timestamps_[i]));
}
}
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
for (int i = 0; i < scene_frame_timestamps_.size(); i++) {
ExternalRenderFrame render_frame;
ConstructExternalRenderMessage(crop_from_locations[i],
render_to_locations[i], padding_color,
scene_frame_timestamps_[i], &render_frame);
external_render_list_->push_back(render_frame);
}
}
key_frame_infos_.clear();
scene_frames_.clear();
scene_frames_or_empty_.clear();
scene_frame_timestamps_.clear();
is_key_frames_.clear();
static_features_.clear();
@ -459,8 +563,10 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
}
::mediapipe::Status SceneCroppingCalculator::FormatAndOutputCroppedFrames(
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
float* vertical_fill_precent, CalculatorContext* cc) {
const std::vector<cv::Mat>& cropped_frames,
std::vector<cv::Rect>* render_to_locations, bool* apply_padding,
cv::Scalar* padding_color, float* vertical_fill_precent,
CalculatorContext* cc) {
RET_CHECK(apply_padding) << "Has padding boolean is null.";
if (cropped_frames.empty()) {
return ::mediapipe::OkStatus();
@ -493,10 +599,22 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
<< " target height = " << target_height_;
}
// Compute the "render to" location. This is where the rect taken from the
// input video gets pasted on the output frame. For use with external
// rendering solutions.
const int num_frames = cropped_frames.size();
for (int i = 0; i < num_frames; i++) {
if (*apply_padding) {
render_to_locations->push_back(padder_->ComputeOutputLocation());
} else {
render_to_locations->push_back(
cv::Rect(0, 0, target_width_, target_height_));
}
}
// Resizes cropped frames, pads frames, and output frames.
cv::Scalar* background_color = nullptr;
cv::Scalar interpolated_color;
const int num_frames = cropped_frames.size();
for (int i = 0; i < num_frames; ++i) {
const int64 time_ms = scene_frame_timestamps_[i];
const Timestamp timestamp(time_ms);
@ -561,9 +679,9 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames(
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
MP_RETURN_IF_ERROR(DrawDetectionsAndCropRegions(
scene_frames_, is_key_frames_, key_frame_infos_, key_frame_crop_results,
frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_.size(); ++i) {
scene_frames_or_empty_, is_key_frames_, key_frame_infos_,
key_frame_crop_results, frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
cc->Outputs()
.Tag(kOutputKeyFrameCropViz)
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));
@ -572,9 +690,10 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames(
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
MP_RETURN_IF_ERROR(DrawFocusPointAndCropWindow(
scene_frames_, focus_point_frames, options_.viz_overlay_opacity(),
crop_window_width, crop_window_height, frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_.size(); ++i) {
scene_frames_or_empty_, focus_point_frames,
options_.viz_overlay_opacity(), crop_window_width, crop_window_height,
frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
cc->Outputs()
.Tag(kOutputFocusPointFrameViz)
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));

View File

@ -79,8 +79,10 @@ namespace autoflip {
// Indicators for shot boundaries (output of shot boundary detection).
// - optional tag KEY_FRAMES (type ImageFrame):
// Key frames on which features are detected. This is only used to set the
// detection features frame size, and when it is omitted, the features frame
// size is assumed to be the original scene frame size.
// detection features frame size. Alternatively, set
// video_feature_width/video_features_height within the options proto to
// define this value. When neither is set, the features frame size is
// assumed to be the original scene frame size.
//
// Output streams:
// - required tag CROPPED_FRAMES (type ImageFrame):
@ -95,6 +97,12 @@ namespace autoflip {
// - optional tag CROPPING_SUMMARY (type VideoCroppingSummary):
// Debug summary information for the video. Only generates one packet when
// calculator closes.
// - optional tag EXTERNAL_RENDERING_PER_FRAME (type ExternalRenderFrame)
// Provides a per-frame message that can be used to render autoflip using an
// external renderer.
// - optional tag EXTERNAL_RENDERING_FULL_VID (type Vector<ExternalRenderFrame>)
// Provides an end-stream message that can be used to render autoflip using
// an external renderer.
//
// Example config:
// node {
@ -134,8 +142,11 @@ class SceneCroppingCalculator : public CalculatorBase {
::mediapipe::Status Close(::mediapipe::CalculatorContext* cc) override;
private:
// Removes any static borders from the scene frames before cropping.
::mediapipe::Status RemoveStaticBorders();
// Removes any static borders from the scene frames before cropping. The
// arguments |top_border_size| and |bottom_border_size| report the size of the
// removed borders.
::mediapipe::Status RemoveStaticBorders(int* top_border_size,
int* bottom_border_size);
// Initializes a FrameCropRegionComputer given input and target frame sizes.
::mediapipe::Status InitializeFrameCropRegionComputer();
@ -158,8 +169,10 @@ class SceneCroppingCalculator : public CalculatorBase {
// solid background from static features if possible, otherwise uses blurred
// background. Sets apply_padding to true if the scene is padded.
::mediapipe::Status FormatAndOutputCroppedFrames(
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
float* vertical_fill_precent, CalculatorContext* cc);
const std::vector<cv::Mat>& cropped_frames,
std::vector<cv::Rect>* render_to_locations, bool* apply_padding,
cv::Scalar* padding_color, float* vertical_fill_precent,
CalculatorContext* cc);
// Draws and outputs visualization frames if those streams are present.
::mediapipe::Status OutputVizFrames(
@ -193,7 +206,11 @@ class SceneCroppingCalculator : public CalculatorBase {
// Buffered frames, timestamps, and indicators for key frames in the current
// scene (size = number of input video frames).
std::vector<cv::Mat> scene_frames_;
// Note: scene_frames_or_empty_ may be empty if the actual cropping operation
// of frames is turned off, e.g. when |should_perform_frame_cropping_| is
// false, so rely on scene_frame_timestamps_.size() to query the number of
// accumulated timestamps rather than scene_frames_or_empty_.size().
std::vector<cv::Mat> scene_frames_or_empty_;
std::vector<int64> scene_frame_timestamps_;
std::vector<bool> is_key_frames_;
@ -242,6 +259,17 @@ class SceneCroppingCalculator : public CalculatorBase {
// Optional diagnostic summary output emitted in Close().
std::unique_ptr<VideoCroppingSummary> summary_ = nullptr;
// Optional list of external rendering messages for each processed frame.
std::unique_ptr<std::vector<ExternalRenderFrame>> external_render_list_;
// Determines whether to perform real cropping on input frames. This flag is
// useful when the user only needs to compute cropping windows, in which case
// setting this flag to false can avoid buffering as well as cropping frames.
// This can significantly reduce memory usage and speed up processing. Some
// debugging visualization inevitably will be disabled because of this flag
// too.
bool should_perform_frame_cropping_ = false;
};
} // namespace autoflip
} // namespace mediapipe

View File

@ -68,6 +68,22 @@ constexpr char kNoKeyFrameConfig[] = R"(
}
})";
constexpr char kDebugConfigNoCroppedFrame[] = R"(
calculator: "SceneCroppingCalculator"
input_stream: "VIDEO_FRAMES:camera_frames_org"
input_stream: "KEY_FRAMES:down_sampled_frames"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:border_features"
input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
target_width: $0
target_height: $1
}
})";
constexpr char kDebugConfig[] = R"(
calculator: "SceneCroppingCalculator"
input_stream: "VIDEO_FRAMES:camera_frames_org"
@ -79,6 +95,8 @@ constexpr char kDebugConfig[] = R"(
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
output_stream: "CROPPING_SUMMARY:cropping_summaries"
output_stream: "EXTERNAL_RENDERING_PER_FRAME:external_rendering_per_frame"
output_stream: "EXTERNAL_RENDERING_FULL_VID:external_rendering_full_vid"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
target_width: $0
@ -257,6 +275,17 @@ TEST(SceneCroppingCalculatorTest, ChecksPriorFrameBufferSize) {
HasSubstr("Prior frame buffer size is negative."));
}
TEST(SceneCroppingCalculatorTest, ChecksDebugConfigWithoutCroppedFrame) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kDebugConfigNoCroppedFrame, kTargetWidth, kTargetHeight,
kTargetSizeType, 0, kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
const auto status = runner->Run();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("can only be used when"));
}
// Checks that the calculator crops scene frames when there is no input key
// frames stream.
TEST(SceneCroppingCalculatorTest, HandlesNoKeyFrames) {
@ -299,14 +328,34 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES"));
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES"));
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY"));
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME"));
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID"));
const auto& crop_region_viz_frames_outputs =
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets;
const auto& salient_point_viz_frames_outputs =
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets;
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets;
const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
const auto& ext_render_full_vid =
outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets;
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(summary_output.size(), 1);
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
EXPECT_EQ(ext_render_full_vid.size(), 1);
EXPECT_EQ(ext_render_per_frame[0].Get<ExternalRenderFrame>().timestamp_us(),
0);
EXPECT_EQ(ext_render_full_vid[0]
.Get<std::vector<ExternalRenderFrame>>()[0]
.timestamp_us(),
0);
EXPECT_EQ(ext_render_per_frame[1].Get<ExternalRenderFrame>().timestamp_us(),
20000);
EXPECT_EQ(ext_render_full_vid[0]
.Get<std::vector<ExternalRenderFrame>>()[1]
.timestamp_us(),
20000);
for (int i = 0; i < num_frames; ++i) {
const auto& crop_region_viz_frame =

View File

@ -173,5 +173,28 @@ PaddingEffectGenerator::PaddingEffectGenerator(const int input_width,
return ::mediapipe::OkStatus();
}
cv::Rect PaddingEffectGenerator::ComputeOutputLocation() {
const int effective_input_width =
is_vertical_padding_ ? input_width_ : input_height_;
const int effective_input_height =
is_vertical_padding_ ? input_height_ : input_width_;
const int effective_output_width =
is_vertical_padding_ ? output_width_ : output_height_;
const int effective_output_height =
is_vertical_padding_ ? output_height_ : output_width_;
// Step 3 from "process" call above, compute foreground location.
const int foreground_height =
effective_input_height * effective_output_width / effective_input_width;
const int x = 0;
const int y = (effective_output_height - foreground_height) / 2;
const int width = effective_output_width;
const int height = foreground_height;
cv::Rect region_to_embed_foreground(x, y, width, height);
return region_to_embed_foreground;
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -55,6 +55,10 @@ class PaddingEffectGenerator {
ImageFrame* output_frame,
const cv::Scalar* background_color_in_rgb = nullptr);
// Compute the "render location" on the output frame where the "crop from"
// location is to be placed. For use with external rendering soutions.
cv::Rect ComputeOutputLocation();
private:
double target_aspect_ratio_;
int input_width_ = -1;

View File

@ -182,6 +182,16 @@ TEST(PaddingEffectGeneratorTest, ScaleToMultipleOfTwo) {
EXPECT_EQ(result_frame.Width(), expect_width);
EXPECT_EQ(result_frame.Height(), expect_height);
}
TEST(PaddingEffectGeneratorTest, ComputeOutputLocation) {
PaddingEffectGenerator generator(1920, 1080, 1.0);
auto result_rect = generator.ComputeOutputLocation();
EXPECT_EQ(result_rect.x, 0);
EXPECT_EQ(result_rect.y, 236);
EXPECT_EQ(result_rect.width, 1080);
EXPECT_EQ(result_rect.height, 607);
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -25,14 +25,13 @@ namespace mediapipe {
namespace autoflip {
::mediapipe::Status SceneCropper::CropFrames(
const SceneKeyFrameCropSummary& scene_summary,
const std::vector<cv::Mat>& scene_frames,
const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames,
const std::vector<cv::Mat>& scene_frames_or_empty,
const std::vector<FocusPointFrame>& focus_point_frames,
const std::vector<FocusPointFrame>& prior_focus_point_frames,
int top_static_border_size, int bottom_static_border_size,
std::vector<cv::Rect>* crop_from_location,
std::vector<cv::Mat>* cropped_frames) const {
RET_CHECK_NE(cropped_frames, nullptr) << "Output cropped frames is null.";
const int num_scene_frames = scene_frames.size();
RET_CHECK_GT(num_scene_frames, 0) << "No scene frames.";
RET_CHECK_EQ(focus_point_frames.size(), num_scene_frames)
<< "Wrong size of FocusPointFrames.";
@ -69,15 +68,36 @@ namespace autoflip {
xform = affine_opencv;
}
// If no cropped_frames is passed in, return directly.
if (!cropped_frames) {
return ::mediapipe::OkStatus();
}
RET_CHECK(!scene_frames_or_empty.empty())
<< "If |cropped_frames| != nullptr, scene_frames_or_empty must not be "
"empty.";
// Prepares cropped frames.
cropped_frames->resize(num_scene_frames);
for (int i = 0; i < num_scene_frames; ++i) {
(*cropped_frames)[i] =
cv::Mat::zeros(crop_height, crop_width, scene_frames[i].type());
(*cropped_frames)[i] = cv::Mat::zeros(crop_height, crop_width,
scene_frames_or_empty[i].type());
}
return AffineRetarget(cv::Size(crop_width, crop_height), scene_frames,
scene_frame_xforms, cropped_frames);
// Store the "crop from" location on the input frame for use with an external
// renderer.
for (int i = 0; i < num_scene_frames; i++) {
const int left = scene_frame_xforms[i].at<float>(0, 2);
const int right = left + crop_width;
const int top = top_static_border_size;
const int bottom =
top_static_border_size +
(crop_height - top_static_border_size - bottom_static_border_size);
crop_from_location->push_back(
cv::Rect(left, top, right - left, bottom - top));
}
return AffineRetarget(cv::Size(crop_width, crop_height),
scene_frames_or_empty, scene_frame_xforms,
cropped_frames);
}
} // namespace autoflip

View File

@ -48,14 +48,19 @@ class SceneCropper {
SceneCropper() {}
~SceneCropper() {}
// Crops scene frames given SceneKeyFrameCropSummary, FocusPointFrames, and
// any prior FocusPointFrames (to ensure smoothness when there was no actual
// scene change).
// Computes transformation matrix given SceneKeyFrameCropSummary,
// FocusPointFrames, and any prior FocusPointFrames (to ensure smoothness when
// there was no actual scene change). Optionally crops the input frames based
// on the transform matrix if |cropped_frames| is not nullptr and
// |scene_frames_or_empty| isn't empty.
// TODO: split this function into two separate functions.
::mediapipe::Status CropFrames(
const SceneKeyFrameCropSummary& scene_summary,
const std::vector<cv::Mat>& scene_frames,
const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames,
const std::vector<cv::Mat>& scene_frames_or_empty,
const std::vector<FocusPointFrame>& focus_point_frames,
const std::vector<FocusPointFrame>& prior_focus_point_frames,
int top_static_border_size, int bottom_static_border_size,
std::vector<cv::Rect>* all_scene_frame_xforms,
std::vector<cv::Mat>* cropped_frames) const;
};

View File

@ -71,24 +71,16 @@ std::vector<FocusPointFrame> GetDefaultFocusPointFrames() {
return GetFocusPointFrames(kNumSceneFrames);
}
// Checks that CropFrames checks output pointer is not null.
TEST(SceneCropperTest, CropFramesChecksOutputNotNull) {
SceneCropper scene_cropper;
const auto status = scene_cropper.CropFrames(
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), nullptr);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Output cropped frames is null."));
}
// Checks that CropFrames checks that scene frames size is positive.
TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) {
SceneCropper scene_cropper;
std::vector<cv::Mat> scene_frames(0);
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto status = scene_cropper.CropFrames(
GetDefaultSceneKeyFrameCropSummary(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames);
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
&crop_from_locations, &cropped_frames);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("No scene frames."));
}
@ -97,10 +89,12 @@ TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) {
TEST(SceneCropperTest, CropFramesChecksFocusPointFramesSize) {
SceneCropper scene_cropper;
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto& scene_frames = GetDefaultSceneFrames();
const auto status = scene_cropper.CropFrames(
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0),
&cropped_frames);
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0), 0, 0,
&crop_from_locations, &cropped_frames);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Wrong size of FocusPointFrames"));
}
@ -111,9 +105,12 @@ TEST(SceneCropperTest, CropFramesChecksCropSizePositive) {
scene_summary.set_crop_window_width(-1);
SceneCropper scene_cropper;
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto& scene_frames = GetDefaultSceneFrames();
const auto status = scene_cropper.CropFrames(
scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(),
GetFocusPointFrames(0), &cropped_frames);
scene_summary, scene_frames.size(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
&crop_from_locations, &cropped_frames);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Crop width is non-positive."));
}
@ -124,9 +121,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) {
scene_summary.set_crop_window_height(kSceneHeight + 1);
SceneCropper scene_cropper;
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto& scene_frames = GetDefaultSceneFrames();
const auto status = scene_cropper.CropFrames(
scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(),
GetFocusPointFrames(0), &cropped_frames);
scene_summary, scene_frames.size(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
&crop_from_locations, &cropped_frames);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
HasSubstr("Crop height exceeds frame height."));
@ -136,9 +136,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) {
TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) {
SceneCropper scene_cropper;
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto& scene_frames = GetDefaultSceneFrames();
MP_ASSERT_OK(scene_cropper.CropFrames(
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames));
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
&crop_from_locations, &cropped_frames));
ASSERT_EQ(cropped_frames.size(), kNumSceneFrames);
for (int i = 0; i < kNumSceneFrames; ++i) {
EXPECT_EQ(cropped_frames[i].rows, kCropHeight);
@ -150,9 +153,12 @@ TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) {
TEST(SceneCropperTest, CropFramesWorksWithPriorFocusPointFrames) {
SceneCropper scene_cropper;
std::vector<cv::Mat> cropped_frames;
std::vector<cv::Rect> crop_from_locations;
const auto& scene_frames = GetDefaultSceneFrames();
MP_EXPECT_OK(scene_cropper.CropFrames(
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
GetDefaultFocusPointFrames(), GetFocusPointFrames(3), &cropped_frames));
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
GetDefaultFocusPointFrames(), GetFocusPointFrames(3), 0, 0,
&crop_from_locations, &cropped_frames));
EXPECT_EQ(cropped_frames.size(), kNumSceneFrames);
for (int i = 0; i < kNumSceneFrames; ++i) {
EXPECT_EQ(cropped_frames[i].rows, kCropHeight);

View File

@ -0,0 +1,42 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
cc_binary(
name = "face_mesh_tflite",
deps = [
"//mediapipe/examples/desktop:simple_run_graph_main",
"//mediapipe/graphs/face_mesh:desktop_calculators",
],
)
cc_binary(
name = "face_mesh_cpu",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main",
"//mediapipe/graphs/face_mesh:desktop_live_calculators",
],
)
# Linux only
cc_binary(
name = "face_mesh_gpu",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main_gpu",
"//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators",
],
)

View File

@ -0,0 +1,21 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <UIKit/UIKit.h>
@interface AppDelegate : UIResponder <UIApplicationDelegate>
@property(strong, nonatomic) UIWindow *window;
@end

View File

@ -0,0 +1,59 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "AppDelegate.h"
@interface AppDelegate ()
@end
@implementation AppDelegate
- (BOOL)application:(UIApplication *)application
didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
// Override point for customization after application launch.
return YES;
}
- (void)applicationWillResignActive:(UIApplication *)application {
// Sent when the application is about to move from active to inactive state. This can occur for
// certain types of temporary interruptions (such as an incoming phone call or SMS message) or
// when the user quits the application and it begins the transition to the background state. Use
// this method to pause ongoing tasks, disable timers, and invalidate graphics rendering
// callbacks. Games should use this method to pause the game.
}
- (void)applicationDidEnterBackground:(UIApplication *)application {
// Use this method to release shared resources, save user data, invalidate timers, and store
// enough application state information to restore your application to its current state in case
// it is terminated later. If your application supports background execution, this method is
// called instead of applicationWillTerminate: when the user quits.
}
- (void)applicationWillEnterForeground:(UIApplication *)application {
// Called as part of the transition from the background to the active state; here you can undo
// many of the changes made on entering the background.
}
- (void)applicationDidBecomeActive:(UIApplication *)application {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If
// the application was previously in the background, optionally refresh the user interface.
}
- (void)applicationWillTerminate:(UIApplication *)application {
// Called when the application is about to terminate. Save data if appropriate. See also
// applicationDidEnterBackground:.
}
@end

View File

@ -0,0 +1,99 @@
{
"images" : [
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "3x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "83.5x83.5",
"scale" : "2x"
},
{
"idiom" : "ios-marketing",
"size" : "1024x1024",
"scale" : "1x"
}
],
"info" : {
"version" : 1,
"author" : "xcode"
}
}

View File

@ -0,0 +1,7 @@
{
"info" : {
"version" : 1,
"author" : "xcode"
}
}

View File

@ -0,0 +1,76 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"@build_bazel_rules_apple//apple:ios.bzl",
"ios_application",
)
licenses(["notice"]) # Apache 2.0
MIN_IOS_VERSION = "10.0"
ios_application(
name = "FaceMeshGpuApp",
bundle_id = "com.google.mediapipe.FaceMeshGpu",
families = [
"iphone",
"ipad",
],
infoplists = ["Info.plist"],
minimum_os_version = MIN_IOS_VERSION,
provisioning_profile = "//mediapipe/examples/ios:provisioning_profile",
deps = [
":FaceMeshGpuAppLibrary",
"@ios_opencv//:OpencvFramework",
],
)
objc_library(
name = "FaceMeshGpuAppLibrary",
srcs = [
"AppDelegate.m",
"ViewController.mm",
"main.m",
],
hdrs = [
"AppDelegate.h",
"ViewController.h",
],
data = [
"Base.lproj/LaunchScreen.storyboard",
"Base.lproj/Main.storyboard",
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
],
sdk_frameworks = [
"AVFoundation",
"CoreGraphics",
"CoreMedia",
"UIKit",
],
deps = [
"//mediapipe/objc:mediapipe_framework_ios",
"//mediapipe/objc:mediapipe_input_sources_ios",
"//mediapipe/objc:mediapipe_layer_renderer",
] + select({
"//mediapipe:ios_i386": [],
"//mediapipe:ios_x86_64": [],
"//conditions:default": [
"//mediapipe/graphs/face_mesh:mobile_calculators",
"//mediapipe/framework/formats:landmark_cc_proto",
],
}),
)

View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="EHf-IW-A2E">
<objects>
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="53" y="375"/>
</scene>
</scenes>
</document>

View File

@ -0,0 +1,51 @@
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14490.70" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14490.49"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="tne-QT-ifu">
<objects>
<viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<view contentMode="scaleToFill" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="EfB-xq-knP">
<rect key="frame" x="0.0" y="20" width="375" height="647"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="Camera access needed for this demo. Please enable camera access in the Settings app." textAlignment="center" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="emf-N5-sEd">
<rect key="frame" x="57" y="248" width="260" height="151"/>
<autoresizingMask key="autoresizingMask" flexibleMinX="YES" flexibleMaxX="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
<fontDescription key="fontDescription" type="system" pointSize="17"/>
<color key="textColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<nil key="highlightedColor"/>
</label>
</subviews>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<accessibility key="accessibilityConfiguration" label="PreviewDisplayView">
<bool key="isElement" value="YES"/>
</accessibility>
</view>
</subviews>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
<connections>
<outlet property="_liveView" destination="EfB-xq-knP" id="JQp-2n-q9q"/>
<outlet property="_noCameraLabel" destination="emf-N5-sEd" id="91G-3Z-cU3"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="48.799999999999997" y="20.239880059970016"/>
</scene>
</scenes>
</document>

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>NSCameraUsageDescription</key>
<string>This app uses the camera to demonstrate live video processing.</string>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>UILaunchStoryboardName</key>
<string>LaunchScreen</string>
<key>UIMainStoryboardFile</key>
<string>Main</string>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
</array>
<key>UISupportedInterfaceOrientations~ipad</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
</array>
</dict>
</plist>

View File

@ -0,0 +1,19 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <UIKit/UIKit.h>
@interface ViewController : UIViewController
@end

View File

@ -0,0 +1,210 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "ViewController.h"
#import "mediapipe/objc/MPPCameraInputSource.h"
#import "mediapipe/objc/MPPGraph.h"
#import "mediapipe/objc/MPPLayerRenderer.h"
#include "mediapipe/framework/formats/landmark.pb.h"
static NSString* const kGraphName = @"face_mesh_mobile_gpu";
static const char* kInputStream = "input_video";
static const char* kNumFacesInputSidePacket = "num_faces";
static const char* kOutputStream = "output_video";
static const char* kLandmarksOutputStream = "multi_face_landmarks";
static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue";
// Max number of faces to detect/process.
static const int kNumFaces = 1;
@interface ViewController () <MPPGraphDelegate, MPPInputSourceDelegate>
// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and
// sent video frames on _videoQueue.
@property(nonatomic) MPPGraph* mediapipeGraph;
@end
@implementation ViewController {
/// Handles camera access via AVCaptureSession library.
MPPCameraInputSource* _cameraSource;
/// Inform the user when camera is unavailable.
IBOutlet UILabel* _noCameraLabel;
/// Display the camera preview frames.
IBOutlet UIView* _liveView;
/// Render frames in a layer.
MPPLayerRenderer* _renderer;
/// Process camera frames on this queue.
dispatch_queue_t _videoQueue;
}
#pragma mark - Cleanup methods
- (void)dealloc {
self.mediapipeGraph.delegate = nil;
[self.mediapipeGraph cancel];
// Ignore errors since we're cleaning up.
[self.mediapipeGraph closeAllInputStreamsWithError:nil];
[self.mediapipeGraph waitUntilDoneWithError:nil];
}
#pragma mark - MediaPipe graph methods
+ (MPPGraph*)loadGraphFromResource:(NSString*)resource {
// Load the graph config resource.
NSError* configLoadError = nil;
NSBundle* bundle = [NSBundle bundleForClass:[self class]];
if (!resource || resource.length == 0) {
return nil;
}
NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"];
NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError];
if (!data) {
NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError);
return nil;
}
// Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object.
mediapipe::CalculatorGraphConfig config;
config.ParseFromArray(data.bytes, data.length);
// Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object.
MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config];
[newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer];
[newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw];
[newGraph setSidePacket:(mediapipe::MakePacket<int>(kNumFaces)) named:kNumFacesInputSidePacket];
return newGraph;
}
#pragma mark - UIViewController methods
- (void)viewDidLoad {
[super viewDidLoad];
_renderer = [[MPPLayerRenderer alloc] init];
_renderer.layer.frame = _liveView.layer.bounds;
[_liveView.layer addSublayer:_renderer.layer];
_renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop;
// When using the front camera, mirror the input for a more natural look.
_renderer.mirrored = YES;
dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class(
DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0);
_videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute);
_cameraSource = [[MPPCameraInputSource alloc] init];
[_cameraSource setDelegate:self queue:_videoQueue];
_cameraSource.sessionPreset = AVCaptureSessionPresetHigh;
_cameraSource.cameraPosition = AVCaptureDevicePositionFront;
// The frame's native format is rotated with respect to the portrait orientation.
_cameraSource.orientation = AVCaptureVideoOrientationPortrait;
self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName];
self.mediapipeGraph.delegate = self;
// Set maxFramesInFlight to a small value to avoid memory contention for real-time processing.
self.mediapipeGraph.maxFramesInFlight = 2;
}
// In this application, there is only one ViewController which has no navigation to other view
// controllers, and there is only one View with live display showing the result of running the
// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph
// setup/teardown and camera start/stop logic should be updated appropriately in response to the
// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times
// depending on the application navigation flow in that case.
- (void)viewWillAppear:(BOOL)animated {
[super viewWillAppear:animated];
[_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) {
if (granted) {
[self startGraphAndCamera];
dispatch_async(dispatch_get_main_queue(), ^{
_noCameraLabel.hidden = YES;
});
}
}];
}
- (void)startGraphAndCamera {
// Start running self.mediapipeGraph.
NSError* error;
if (![self.mediapipeGraph startWithError:&error]) {
NSLog(@"Failed to start graph: %@", error);
}
// Start fetching frames from the camera.
dispatch_async(_videoQueue, ^{
[_cameraSource start];
});
}
#pragma mark - MPPGraphDelegate methods
// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread.
- (void)mediapipeGraph:(MPPGraph*)graph
didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer
fromStream:(const std::string&)streamName {
if (streamName == kOutputStream) {
// Display the captured image on the screen.
CVPixelBufferRetain(pixelBuffer);
dispatch_async(dispatch_get_main_queue(), ^{
[_renderer renderPixelBuffer:pixelBuffer];
CVPixelBufferRelease(pixelBuffer);
});
}
}
// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread.
- (void)mediapipeGraph:(MPPGraph*)graph
didOutputPacket:(const ::mediapipe::Packet&)packet
fromStream:(const std::string&)streamName {
if (streamName == kLandmarksOutputStream) {
if (packet.IsEmpty()) {
NSLog(@"[TS:%lld] No face landmarks", packet.Timestamp().Value());
return;
}
const auto& multi_face_landmarks = packet.Get<std::vector<::mediapipe::NormalizedLandmarkList>>();
NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(),
multi_face_landmarks.size());
for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) {
const auto& landmarks = multi_face_landmarks[face_index];
NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size());
for (int i = 0; i < landmarks.landmark_size(); ++i) {
NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(),
landmarks.landmark(i).y(), landmarks.landmark(i).z());
}
}
}
}
#pragma mark - MPPInputSourceDelegate methods
// Must be invoked on _videoQueue.
- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer
timestamp:(CMTime)timestamp
fromSource:(MPPInputSource*)source {
if (source != _cameraSource) {
NSLog(@"Unknown source: %@", source);
return;
}
[self.mediapipeGraph sendPixelBuffer:imageBuffer
intoStream:kInputStream
packetType:MPPPacketTypePixelBuffer];
}
@end

View File

@ -0,0 +1,22 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <UIKit/UIKit.h>
#import "AppDelegate.h"
int main(int argc, char * argv[]) {
@autoreleasepool {
return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
}
}

View File

@ -28,6 +28,8 @@
namespace mediapipe {
namespace {
constexpr int kIntTestValue = 33;
typedef std::function<::mediapipe::Status(CalculatorContext* cc)>
CalculatorContextFunction;
@ -617,8 +619,9 @@ TEST(CalculatorGraphBoundsTest, ImmediateHandlerBounds) {
MP_ASSERT_OK(graph.WaitUntilIdle());
// Add four packets into the graph.
for (int i = 0; i < 4; ++i) {
Packet p = MakePacket<int>(33).At(Timestamp(i));
constexpr int kNumInputs = 4;
for (int i = 0; i < kNumInputs; ++i) {
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
}
@ -709,7 +712,7 @@ REGISTER_CALCULATOR(FuturePacketCalculator);
// produces no output packets.
TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) {
// OffsetBoundCalculator produces only timestamp bounds.
// The PassthroughCalculator delivers an output packet whenever the
// The PassThroughCalculator delivers an output packet whenever the
// OffsetBoundCalculator delivers a timestamp bound.
CalculatorGraphConfig config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
@ -740,7 +743,7 @@ TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) {
// Add four packets into the graph.
constexpr int kNumInputs = 4;
for (int i = 0; i < kNumInputs; ++i) {
Packet p = MakePacket<int>(33).At(Timestamp(i));
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
}
@ -791,12 +794,15 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) {
// Add four packets into the graph.
constexpr int kNumInputs = 4;
for (int i = 0; i < kNumInputs; ++i) {
Packet p = MakePacket<int>(33).At(Timestamp(i));
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
}
// No packets arrive, because updated timestamp bounds do not invoke
// No packets arrive, because FuturePacketCalculator produces 4 packets but
// OffsetBoundCalculator relays only the 4 timestamps without any packets, and
// BoundToPacketCalculator does not process timestamps using
// SetProcessTimestampBounds. Thus, the graph does not invoke
// BoundToPacketCalculator::Process.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_EQ(output_packets.size(), 0);
@ -1138,6 +1144,8 @@ class ProcessBoundToPacketCalculator : public CalculatorBase {
::mediapipe::Status Process(CalculatorContext* cc) final {
for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
Timestamp t = cc->Inputs().Index(i).Value().Timestamp();
// Create a new packet for each input stream with a new timestamp bound,
// as long as the new timestamp satisfies the output timestamp bound.
if (t == cc->InputTimestamp() &&
t >= cc->Outputs().Index(i).NextTimestampBound()) {
cc->Outputs().Index(i).Add(new auto(t), t);
@ -1168,6 +1176,8 @@ class ImmediatePassthroughCalculator : public CalculatorBase {
if (!cc->Inputs().Index(i).IsEmpty()) {
cc->Outputs().Index(i).AddPacket(cc->Inputs().Index(i).Value());
} else {
// Update the output stream "i" nextTimestampBound to the timestamp at
// which a packet may next be available in input stream "i".
Timestamp input_bound =
cc->Inputs().Index(i).Value().Timestamp().NextAllowedInStream();
if (cc->Outputs().Index(i).NextTimestampBound() < input_bound) {
@ -1219,33 +1229,22 @@ void TestProcessForEmptyInputs(const std::string& input_stream_handler) {
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.WaitUntilIdle());
// Add four packets into the graph.
// Add four packets into the graph at ts {0, 10, 20, 30}.
constexpr int kFutureMicros = FuturePacketCalculator::kOutputFutureMicros;
Packet p;
p = MakePacket<int>(33).At(Timestamp(0));
constexpr int kNumInputs = 4;
std::vector<Timestamp> expected;
for (int i = 0; i < kNumInputs; ++i) {
const int ts = i * 10;
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
p = MakePacket<int>(33).At(Timestamp(10));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
p = MakePacket<int>(33).At(Timestamp(20));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
p = MakePacket<int>(33).At(Timestamp(30));
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
expected.emplace_back(Timestamp(ts + kFutureMicros));
}
// Packets arrive.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_EQ(bounds_ts_packets.size(), 4);
std::vector<Timestamp> expected = {
Timestamp(0 + kFutureMicros), Timestamp(10 + kFutureMicros),
Timestamp(20 + kFutureMicros), Timestamp(30 + kFutureMicros)};
EXPECT_EQ(GetContents<Timestamp>(bounds_ts_packets), expected);
EXPECT_EQ(bounds_ts_packets.size(), kNumInputs);
// Shutdown the graph.
MP_ASSERT_OK(graph.CloseAllPacketSources());
@ -1335,34 +1334,41 @@ TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) {
MP_ASSERT_OK(graph.WaitUntilIdle());
// Add four packets to input_0.
for (int i = 0; i < 4; ++i) {
Packet p = MakePacket<int>(33).At(Timestamp(i * 10));
constexpr int kNumInputs0 = 4;
std::vector<Timestamp> expected_output_0;
for (int i = 0; i < kNumInputs0; ++i) {
const int ts = i * 10;
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
expected_output_0.emplace_back(Timestamp(ts));
}
// Packets arrive.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_EQ(output_0_packets.size(), 4);
EXPECT_EQ(output_0_packets.size(), kNumInputs0);
// No packets were pushed in "input_1".
EXPECT_EQ(output_1_packets.size(), 0);
std::vector<Timestamp> expected = //
{Timestamp(0), Timestamp(10), Timestamp(20), Timestamp(30)};
EXPECT_EQ(GetContents<Timestamp>(output_0_packets), expected);
EXPECT_EQ(GetContents<Timestamp>(output_0_packets), expected_output_0);
// Add two timestamp bounds to bound_1.
for (int i = 0; i < 2; ++i) {
Packet p = MakePacket<int>(33).At(Timestamp(10 + i * 10));
// Add two timestamp bounds to "input_1" and update "bound_1" at {10, 20}.
constexpr int kNumInputs1 = 2;
std::vector<Timestamp> expected_output_1;
for (int i = 0; i < kNumInputs1; ++i) {
const int ts = 10 + i * 10;
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
MP_ASSERT_OK(graph.AddPacketToInputStream("input_1", p));
MP_ASSERT_OK(graph.WaitUntilIdle());
expected_output_1.emplace_back(Timestamp(ts));
}
// Bounds arrive.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_EQ(output_0_packets.size(), 4);
EXPECT_EQ(output_1_packets.size(), 2);
expected = //
{Timestamp(10), Timestamp(20)};
EXPECT_EQ(GetContents<Timestamp>(output_1_packets), expected);
EXPECT_EQ(output_0_packets.size(), kNumInputs0);
EXPECT_EQ(output_1_packets.size(), kNumInputs1);
EXPECT_EQ(GetContents<Timestamp>(output_1_packets), expected_output_1);
// Shutdown the graph.
MP_ASSERT_OK(graph.CloseAllPacketSources());

View File

@ -186,6 +186,7 @@ cc_library(
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/port:integral_types",
"@com_google_absl//absl/container:node_hash_map",
"@com_google_absl//absl/time",
],
)

View File

@ -24,6 +24,7 @@
#include <utility>
#include <vector>
#include "absl/container/node_hash_map.h"
#include "mediapipe/framework/calculator_profile.pb.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/integral_types.h"
@ -130,10 +131,10 @@ class AddressIdMap {
return pointer_id_map_[id] = next_id++;
}
void clear() { pointer_id_map_.clear(); }
const std::unordered_map<int64, int32>& map() { return pointer_id_map_; }
const absl::node_hash_map<int64, int32>& map() { return pointer_id_map_; }
private:
std::unordered_map<int64, int32> pointer_id_map_;
absl::node_hash_map<int64, int32> pointer_id_map_;
int32 next_id = 0;
};

View File

@ -568,7 +568,7 @@ class LambdaCalculator : public CalculatorBase {
if (cc->InputSidePackets().HasTag("") > 0) {
cc->InputSidePackets().Tag("").Set<ProcessFunction>();
}
for (std::string tag : {"OPEN", "PROCESS", "CLOSE"}) {
for (const std::string& tag : {"OPEN", "PROCESS", "CLOSE"}) {
if (cc->InputSidePackets().HasTag(tag)) {
cc->InputSidePackets().Tag(tag).Set<CalculatorContextFunction>();
}

View File

@ -150,7 +150,7 @@ static ::mediapipe::Status PrefixNames(std::string prefix,
const proto_ns::RepeatedPtrField<ProtoString>& dst_streams) {
ASSIGN_OR_RETURN(auto src_map, tool::TagMap::Create(src_streams));
ASSIGN_OR_RETURN(auto dst_map, tool::TagMap::Create(dst_streams));
for (auto it : dst_map->Mapping()) {
for (const auto& it : dst_map->Mapping()) {
const std::string& tag = it.first;
const TagMap::TagData* src_tag_data =
::mediapipe::FindOrNull(src_map->Mapping(), tag);

View File

@ -0,0 +1,69 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
cc_library(
name = "desktop_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_gpu_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
mediapipe_binary_graph(
name = "face_mesh_mobile_gpu_binary_graph",
graph = "face_mesh_mobile.pbtxt",
output_name = "face_mesh_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,67 @@
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
# on CPU.
# Path to the input video file. (string)
input_side_packet: "input_video_path"
# Path to the output video file. (string)
input_side_packet: "output_video_path"
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input video.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,63 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# Input image. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,63 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# Input image. (GpuBuffer)
input_stream: "input_video"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,55 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# Max number of faces to detect/process. (int)
input_side_packet: "num_faces"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,51 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
cc_library(
name = "renderer_calculators",
deps = [
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_gpu",
graph = "face_renderer_gpu.pbtxt",
register_as = "FaceRendererGpu",
deps = [
":renderer_calculators",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_cpu",
graph = "face_renderer_cpu.pbtxt",
register_as = "FaceRendererCpu",
deps = [
":renderer_calculators",
],
)

View File

@ -0,0 +1,350 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# CPU image with rendered data. (ImageFrame)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:landmark_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmark_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
# Lips.
landmark_connections: 61
landmark_connections: 146
landmark_connections: 146
landmark_connections: 91
landmark_connections: 91
landmark_connections: 181
landmark_connections: 181
landmark_connections: 84
landmark_connections: 84
landmark_connections: 17
landmark_connections: 17
landmark_connections: 314
landmark_connections: 314
landmark_connections: 405
landmark_connections: 405
landmark_connections: 321
landmark_connections: 321
landmark_connections: 375
landmark_connections: 375
landmark_connections: 291
landmark_connections: 61
landmark_connections: 185
landmark_connections: 185
landmark_connections: 40
landmark_connections: 40
landmark_connections: 39
landmark_connections: 39
landmark_connections: 37
landmark_connections: 37
landmark_connections: 0
landmark_connections: 0
landmark_connections: 267
landmark_connections: 267
landmark_connections: 269
landmark_connections: 269
landmark_connections: 270
landmark_connections: 270
landmark_connections: 409
landmark_connections: 409
landmark_connections: 291
landmark_connections: 78
landmark_connections: 95
landmark_connections: 95
landmark_connections: 88
landmark_connections: 88
landmark_connections: 178
landmark_connections: 178
landmark_connections: 87
landmark_connections: 87
landmark_connections: 14
landmark_connections: 14
landmark_connections: 317
landmark_connections: 317
landmark_connections: 402
landmark_connections: 402
landmark_connections: 318
landmark_connections: 318
landmark_connections: 324
landmark_connections: 324
landmark_connections: 308
landmark_connections: 78
landmark_connections: 191
landmark_connections: 191
landmark_connections: 80
landmark_connections: 80
landmark_connections: 81
landmark_connections: 81
landmark_connections: 82
landmark_connections: 82
landmark_connections: 13
landmark_connections: 13
landmark_connections: 312
landmark_connections: 312
landmark_connections: 311
landmark_connections: 311
landmark_connections: 310
landmark_connections: 310
landmark_connections: 415
landmark_connections: 415
landmark_connections: 308
# Left eye.
landmark_connections: 33
landmark_connections: 7
landmark_connections: 7
landmark_connections: 163
landmark_connections: 163
landmark_connections: 144
landmark_connections: 144
landmark_connections: 145
landmark_connections: 145
landmark_connections: 153
landmark_connections: 153
landmark_connections: 154
landmark_connections: 154
landmark_connections: 155
landmark_connections: 155
landmark_connections: 133
landmark_connections: 33
landmark_connections: 246
landmark_connections: 246
landmark_connections: 161
landmark_connections: 161
landmark_connections: 160
landmark_connections: 160
landmark_connections: 159
landmark_connections: 159
landmark_connections: 158
landmark_connections: 158
landmark_connections: 157
landmark_connections: 157
landmark_connections: 173
landmark_connections: 173
landmark_connections: 133
# Left eyebrow.
landmark_connections: 46
landmark_connections: 53
landmark_connections: 53
landmark_connections: 52
landmark_connections: 52
landmark_connections: 65
landmark_connections: 65
landmark_connections: 55
landmark_connections: 70
landmark_connections: 63
landmark_connections: 63
landmark_connections: 105
landmark_connections: 105
landmark_connections: 66
landmark_connections: 66
landmark_connections: 107
# Right eye.
landmark_connections: 263
landmark_connections: 249
landmark_connections: 249
landmark_connections: 390
landmark_connections: 390
landmark_connections: 373
landmark_connections: 373
landmark_connections: 374
landmark_connections: 374
landmark_connections: 380
landmark_connections: 380
landmark_connections: 381
landmark_connections: 381
landmark_connections: 382
landmark_connections: 382
landmark_connections: 362
landmark_connections: 263
landmark_connections: 466
landmark_connections: 466
landmark_connections: 388
landmark_connections: 388
landmark_connections: 387
landmark_connections: 387
landmark_connections: 386
landmark_connections: 386
landmark_connections: 385
landmark_connections: 385
landmark_connections: 384
landmark_connections: 384
landmark_connections: 398
landmark_connections: 398
landmark_connections: 362
# Right eyebrow.
landmark_connections: 276
landmark_connections: 283
landmark_connections: 283
landmark_connections: 282
landmark_connections: 282
landmark_connections: 295
landmark_connections: 295
landmark_connections: 285
landmark_connections: 300
landmark_connections: 293
landmark_connections: 293
landmark_connections: 334
landmark_connections: 334
landmark_connections: 296
landmark_connections: 296
landmark_connections: 336
# Face oval.
landmark_connections: 10
landmark_connections: 338
landmark_connections: 338
landmark_connections: 297
landmark_connections: 297
landmark_connections: 332
landmark_connections: 332
landmark_connections: 284
landmark_connections: 284
landmark_connections: 251
landmark_connections: 251
landmark_connections: 389
landmark_connections: 389
landmark_connections: 356
landmark_connections: 356
landmark_connections: 454
landmark_connections: 454
landmark_connections: 323
landmark_connections: 323
landmark_connections: 361
landmark_connections: 361
landmark_connections: 288
landmark_connections: 288
landmark_connections: 397
landmark_connections: 397
landmark_connections: 365
landmark_connections: 365
landmark_connections: 379
landmark_connections: 379
landmark_connections: 378
landmark_connections: 378
landmark_connections: 400
landmark_connections: 400
landmark_connections: 377
landmark_connections: 377
landmark_connections: 152
landmark_connections: 152
landmark_connections: 148
landmark_connections: 148
landmark_connections: 176
landmark_connections: 176
landmark_connections: 149
landmark_connections: 149
landmark_connections: 150
landmark_connections: 150
landmark_connections: 136
landmark_connections: 136
landmark_connections: 172
landmark_connections: 172
landmark_connections: 58
landmark_connections: 58
landmark_connections: 132
landmark_connections: 132
landmark_connections: 93
landmark_connections: 93
landmark_connections: 234
landmark_connections: 234
landmark_connections: 127
landmark_connections: 127
landmark_connections: 162
landmark_connections: 162
landmark_connections: 21
landmark_connections: 21
landmark_connections: 54
landmark_connections: 54
landmark_connections: 103
landmark_connections: 103
landmark_connections: 67
landmark_connections: 67
landmark_connections: 109
landmark_connections: 109
landmark_connections: 10
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 1.5
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmark_render_data"
input_stream: "BATCH_END:landmark_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:rects"
output_stream: "RENDER_DATA:rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_image"
input_stream: "detections_render_data"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
input_stream: "rects_render_data"
output_stream: "IMAGE:output_image"
}

View File

@ -0,0 +1,350 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# GPU image with rendered data. (GpuBuffer)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:end_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
# Lips.
landmark_connections: 61
landmark_connections: 146
landmark_connections: 146
landmark_connections: 91
landmark_connections: 91
landmark_connections: 181
landmark_connections: 181
landmark_connections: 84
landmark_connections: 84
landmark_connections: 17
landmark_connections: 17
landmark_connections: 314
landmark_connections: 314
landmark_connections: 405
landmark_connections: 405
landmark_connections: 321
landmark_connections: 321
landmark_connections: 375
landmark_connections: 375
landmark_connections: 291
landmark_connections: 61
landmark_connections: 185
landmark_connections: 185
landmark_connections: 40
landmark_connections: 40
landmark_connections: 39
landmark_connections: 39
landmark_connections: 37
landmark_connections: 37
landmark_connections: 0
landmark_connections: 0
landmark_connections: 267
landmark_connections: 267
landmark_connections: 269
landmark_connections: 269
landmark_connections: 270
landmark_connections: 270
landmark_connections: 409
landmark_connections: 409
landmark_connections: 291
landmark_connections: 78
landmark_connections: 95
landmark_connections: 95
landmark_connections: 88
landmark_connections: 88
landmark_connections: 178
landmark_connections: 178
landmark_connections: 87
landmark_connections: 87
landmark_connections: 14
landmark_connections: 14
landmark_connections: 317
landmark_connections: 317
landmark_connections: 402
landmark_connections: 402
landmark_connections: 318
landmark_connections: 318
landmark_connections: 324
landmark_connections: 324
landmark_connections: 308
landmark_connections: 78
landmark_connections: 191
landmark_connections: 191
landmark_connections: 80
landmark_connections: 80
landmark_connections: 81
landmark_connections: 81
landmark_connections: 82
landmark_connections: 82
landmark_connections: 13
landmark_connections: 13
landmark_connections: 312
landmark_connections: 312
landmark_connections: 311
landmark_connections: 311
landmark_connections: 310
landmark_connections: 310
landmark_connections: 415
landmark_connections: 415
landmark_connections: 308
# Left eye.
landmark_connections: 33
landmark_connections: 7
landmark_connections: 7
landmark_connections: 163
landmark_connections: 163
landmark_connections: 144
landmark_connections: 144
landmark_connections: 145
landmark_connections: 145
landmark_connections: 153
landmark_connections: 153
landmark_connections: 154
landmark_connections: 154
landmark_connections: 155
landmark_connections: 155
landmark_connections: 133
landmark_connections: 33
landmark_connections: 246
landmark_connections: 246
landmark_connections: 161
landmark_connections: 161
landmark_connections: 160
landmark_connections: 160
landmark_connections: 159
landmark_connections: 159
landmark_connections: 158
landmark_connections: 158
landmark_connections: 157
landmark_connections: 157
landmark_connections: 173
landmark_connections: 173
landmark_connections: 133
# Left eyebrow.
landmark_connections: 46
landmark_connections: 53
landmark_connections: 53
landmark_connections: 52
landmark_connections: 52
landmark_connections: 65
landmark_connections: 65
landmark_connections: 55
landmark_connections: 70
landmark_connections: 63
landmark_connections: 63
landmark_connections: 105
landmark_connections: 105
landmark_connections: 66
landmark_connections: 66
landmark_connections: 107
# Right eye.
landmark_connections: 263
landmark_connections: 249
landmark_connections: 249
landmark_connections: 390
landmark_connections: 390
landmark_connections: 373
landmark_connections: 373
landmark_connections: 374
landmark_connections: 374
landmark_connections: 380
landmark_connections: 380
landmark_connections: 381
landmark_connections: 381
landmark_connections: 382
landmark_connections: 382
landmark_connections: 362
landmark_connections: 263
landmark_connections: 466
landmark_connections: 466
landmark_connections: 388
landmark_connections: 388
landmark_connections: 387
landmark_connections: 387
landmark_connections: 386
landmark_connections: 386
landmark_connections: 385
landmark_connections: 385
landmark_connections: 384
landmark_connections: 384
landmark_connections: 398
landmark_connections: 398
landmark_connections: 362
# Right eyebrow.
landmark_connections: 276
landmark_connections: 283
landmark_connections: 283
landmark_connections: 282
landmark_connections: 282
landmark_connections: 295
landmark_connections: 295
landmark_connections: 285
landmark_connections: 300
landmark_connections: 293
landmark_connections: 293
landmark_connections: 334
landmark_connections: 334
landmark_connections: 296
landmark_connections: 296
landmark_connections: 336
# Face oval.
landmark_connections: 10
landmark_connections: 338
landmark_connections: 338
landmark_connections: 297
landmark_connections: 297
landmark_connections: 332
landmark_connections: 332
landmark_connections: 284
landmark_connections: 284
landmark_connections: 251
landmark_connections: 251
landmark_connections: 389
landmark_connections: 389
landmark_connections: 356
landmark_connections: 356
landmark_connections: 454
landmark_connections: 454
landmark_connections: 323
landmark_connections: 323
landmark_connections: 361
landmark_connections: 361
landmark_connections: 288
landmark_connections: 288
landmark_connections: 397
landmark_connections: 397
landmark_connections: 365
landmark_connections: 365
landmark_connections: 379
landmark_connections: 379
landmark_connections: 378
landmark_connections: 378
landmark_connections: 400
landmark_connections: 400
landmark_connections: 377
landmark_connections: 377
landmark_connections: 152
landmark_connections: 152
landmark_connections: 148
landmark_connections: 148
landmark_connections: 176
landmark_connections: 176
landmark_connections: 149
landmark_connections: 149
landmark_connections: 150
landmark_connections: 150
landmark_connections: 136
landmark_connections: 136
landmark_connections: 172
landmark_connections: 172
landmark_connections: 58
landmark_connections: 58
landmark_connections: 132
landmark_connections: 132
landmark_connections: 93
landmark_connections: 93
landmark_connections: 234
landmark_connections: 234
landmark_connections: 127
landmark_connections: 127
landmark_connections: 162
landmark_connections: 162
landmark_connections: 21
landmark_connections: 21
landmark_connections: 54
landmark_connections: 54
landmark_connections: 103
landmark_connections: 103
landmark_connections: 67
landmark_connections: 67
landmark_connections: 109
landmark_connections: 109
landmark_connections: 10
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmarks_render_data"
input_stream: "BATCH_END:end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:rects"
output_stream: "RENDER_DATA:rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
input_stream: "detections_render_data"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
input_stream: "rects_render_data"
output_stream: "IMAGE_GPU:output_image"
}

View File

@ -1,6 +1,6 @@
## MediaPipe Models
Here are descriptions of the models used in the [example applications](../docs/examples.md).
Here are the descriptions of the models used in the [example applications](../docs/examples.md).
### Object Detection
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/ssdlite_object_detection.tflite)
@ -8,24 +8,29 @@ Here are descriptions of the models used in the [example applications](../docs/e
### Face Detection
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite)
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/blazeface)
* Paper: ["BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs"](https://arxiv.org/abs/1907.05047)
* [Model card](https://sites.google.com/corp/view/perception-cv4arvr/blazeface#h.p_21ojPZDx3cqq)
### Face Mesh
* [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* Face detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) (see above)
* 3D face landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/facemesh)
* Paper: ["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724)
* [Google AI Blog post](https://ai.googleblog.com/2019/03/real-time-ar-self-expression-with.html)
* [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
* [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
### Hand Detection and Tracking
* Palm detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite), [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* 2D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite)
* 3D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* 2D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite)
* 3D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Google AI Blog post](https://mediapipe.page.link/handgoogleaiblog)
* [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
* [Model card](https://mediapipe.page.link/handmc)
### Hair Segmentation
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite)
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation)
* Paper: ["Real-time Hair segmentation and recoloring on Mobile GPUs"](https://arxiv.org/abs/1907.06740)
* [Model card](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation#h.p_NimuO7PgHxlY)
* [Model card](https://drive.google.com/file/d/1lPwJ8BD_-3UUor4LayQ0xpa_RIC_hoRh/view)

Binary file not shown.

View File

@ -0,0 +1,11 @@
# Modules
Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks).
*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.*
| Module | Description |
| :--- | :--- |
| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. |
| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. |

View File

@ -0,0 +1,58 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_detection_front_cpu",
graph = "face_detection_front_cpu.pbtxt",
register_as = "FaceDetectionFrontCpu",
deps = [
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_front_gpu",
graph = "face_detection_front_gpu.pbtxt",
register_as = "FaceDetectionFrontGpu",
deps = [
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
exports_files(
srcs = [
"face_detection_front.tflite",
],
)

View File

@ -0,0 +1,7 @@
# face_detection
Subgraphs|Details
:--- | :---
[`FaceDetectionFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (CPU input, and inference is executed on CPU.)
[`FaceDetectionFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (GPU input, and inference is executed on GPU.)

Binary file not shown.

View File

@ -0,0 +1,143 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_front.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_front.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFrontCpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFrontCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Transforms the input image on CPU to a 128x128 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio
# (what is expected by the corresponding face detection model), resulting in
# potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:image"
output_stream: "IMAGE:transformed_image"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageTransformationCalculatorOptions.ext] {
output_width: 128
output_height: 128
scale_mode: FIT
}
}
}
# Converts the transformed input image on CPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_image"
output_stream: "TENSORS:input_tensors"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_front.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 4
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 128
input_size_width: 128
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 16
strides: 16
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 896
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 128.0
y_scale: 128.0
h_scale: 128.0
w_scale: 128.0
min_score_thresh: 0.75
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:detections"
}

View File

@ -0,0 +1,143 @@
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
# GPU.)
#
# It is required that "face_detection_front.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_front.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFrontGpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFrontGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Transforms the input image on GPU to a 128x128 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio
# (what is expected by the corresponding face detection model), resulting in
# potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "IMAGE_GPU:transformed_image"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageTransformationCalculatorOptions.ext] {
output_width: 128
output_height: 128
scale_mode: FIT
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_image"
output_stream: "TENSORS_GPU:input_tensors"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:input_tensors"
output_stream: "TENSORS_GPU:detection_tensors"
options: {
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_front.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 4
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 128
input_size_width: 128
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 16
strides: 16
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS_GPU:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 896
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 128.0
y_scale: 128.0
h_scale: 128.0
w_scale: 128.0
min_score_thresh: 0.75
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:detections"
}

Some files were not shown because too many files have changed in this diff Show More