Project import generated by Copybara.
GitOrigin-RevId: 72ff4ae24943c2ccf9905bc9e516042b0aa3dd86
25
README.md
|
@ -9,26 +9,32 @@
|
|||
|
||||
## ML Solutions in MediaPipe
|
||||
|
||||
* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html)
|
||||
* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html)
|
||||
* [Face Mesh](mediapipe/docs/face_mesh_mobile_gpu.md)
|
||||
* [Hand Detection](mediapipe/docs/hand_detection_mobile_gpu.md)
|
||||
* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
|
||||
* [Multi-hand Tracking](mediapipe/docs/multi_hand_tracking_mobile_gpu.md)
|
||||
* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
|
||||
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html)
|
||||
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html)
|
||||
* [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
|
||||
* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
|
||||
* [Objectron: 3D Object Detection and Tracking](mediapipe/docs/objectron_mobile_gpu.md)
|
||||
* [AutoFlip](mediapipe/docs/autoflip.md)
|
||||
* [AutoFlip: Intelligent Video Reframing](mediapipe/docs/autoflip.md)
|
||||
|
||||
![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
|
||||
![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif)
|
||||
![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif)
|
||||
![face_mesh](mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif)
|
||||
![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif)
|
||||
![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif)
|
||||
![hair_segmentation](mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif)
|
||||
![object_detection](mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif)
|
||||
![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif)
|
||||
![objectron_shoes](mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif)
|
||||
![objectron_chair](mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif)
|
||||
|
||||
## Installation
|
||||
Follow these [instructions](mediapipe/docs/install.md).
|
||||
|
||||
## Getting started
|
||||
See mobile, desktop and Google Coral [examples](mediapipe/docs/examples.md).
|
||||
See mobile, desktop, web and Google Coral [examples](mediapipe/docs/examples.md).
|
||||
|
||||
Check out some web demos [[Edge detection]](https://viz.mediapipe.dev/runner/demos/edge_detection/edge_detection.html) [[Face detection]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html) [[Hand Tracking]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html)
|
||||
|
||||
|
@ -40,10 +46,14 @@ Check out the [Examples page](https://mediapipe.readthedocs.io/en/latest/example
|
|||
## Visualizing MediaPipe graphs
|
||||
A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.dev/). Please also see instructions [here](mediapipe/docs/visualizer.md).
|
||||
|
||||
## Google Open Source Code search
|
||||
Search MediaPipe Github repository using [Google Open Source code search](https://t.co/LSZnbMUUnT?amp=1)
|
||||
|
||||
## Videos
|
||||
* [YouTube Channel](https://www.youtube.com/channel/UCObqmpuSMx-usADtL_qdMAw)
|
||||
|
||||
## Publications
|
||||
* [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html)
|
||||
* [MediaPipe Objectron: Real-time 3D Object Detection on Mobile Devices](https://mediapipe.page.link/objectron-aiblog)
|
||||
* [AutoFlip: An Open Source Framework for Intelligent Video Reframing](https://mediapipe.page.link/autoflip)
|
||||
* [Google Developer Blog: MediaPipe on the Web](https://mediapipe.page.link/webdevblog)
|
||||
|
@ -52,6 +62,7 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de
|
|||
* [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172)
|
||||
|
||||
## Events
|
||||
* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020)
|
||||
* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/)
|
||||
* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/)
|
||||
* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038)
|
||||
|
|
|
@ -184,23 +184,14 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
|
|||
use_local_timestamp_ = spectrogram_options.use_local_timestamp();
|
||||
|
||||
if (spectrogram_options.frame_duration_seconds() <= 0.0) {
|
||||
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Invalid or missing frame_duration_seconds.\n"
|
||||
"frame_duration_seconds: "
|
||||
<< spectrogram_options.frame_overlap_seconds();
|
||||
// TODO: return an error.
|
||||
}
|
||||
if (spectrogram_options.frame_overlap_seconds() >=
|
||||
spectrogram_options.frame_duration_seconds()) {
|
||||
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Invalid frame_overlap_seconds.\nframe_overlap_seconds: "
|
||||
<< spectrogram_options.frame_overlap_seconds()
|
||||
<< "\nframe_duration_seconds: "
|
||||
<< spectrogram_options.frame_duration_seconds();
|
||||
// TODO: return an error.
|
||||
}
|
||||
if (spectrogram_options.frame_overlap_seconds() < 0.0) {
|
||||
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Frame_overlap_seconds is < 0.0.\nframe_overlap_seconds: "
|
||||
<< spectrogram_options.frame_overlap_seconds();
|
||||
// TODO: return an error.
|
||||
}
|
||||
|
||||
TimeSeriesHeader input_header;
|
||||
|
@ -212,9 +203,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
|
|||
|
||||
if (!spectrogram_options.allow_multichannel_input() &&
|
||||
num_input_channels_ != 1) {
|
||||
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "The current setting only supports single-channel input. Please set "
|
||||
"allow_multichannel_input.\n";
|
||||
// TODO: return an error.
|
||||
}
|
||||
|
||||
frame_duration_samples_ =
|
||||
|
@ -293,10 +282,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
|
|||
|
||||
const Matrix& input_stream = cc->Inputs().Index(0).Get<Matrix>();
|
||||
if (input_stream.rows() != num_input_channels_) {
|
||||
::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Number of input channels do not correspond to the number of rows "
|
||||
<< "in the input matrix: " << num_input_channels_ << "channels vs "
|
||||
<< input_stream.rows() << " rows";
|
||||
// TODO: return an error.
|
||||
}
|
||||
|
||||
cumulative_input_samples_ += input_stream.cols();
|
||||
|
|
|
@ -815,6 +815,38 @@ cc_test(
|
|||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "split_normalized_landmark_list_calculator",
|
||||
srcs = ["split_normalized_landmark_list_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":split_vector_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util:resource_util",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "split_normalized_landmark_list_calculator_test",
|
||||
srcs = ["split_normalized_landmark_list_calculator_test.cc"],
|
||||
deps = [
|
||||
":split_normalized_landmark_list_calculator",
|
||||
":split_vector_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/tool:validate_type",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "dequantize_byte_array_calculator",
|
||||
srcs = ["dequantize_byte_array_calculator.cc"],
|
||||
|
|
|
@ -51,8 +51,8 @@ namespace mediapipe {
|
|||
class ConstantSidePacketCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
const auto& options = cc->Options().GetExtension(
|
||||
::mediapipe::ConstantSidePacketCalculatorOptions::ext);
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
|
||||
RET_CHECK_EQ(cc->OutputSidePackets().NumEntries(kPacketTag),
|
||||
options.packet_size())
|
||||
<< "Number of output side packets has to be same as number of packets "
|
||||
|
@ -80,8 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
|
|||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
const auto& options = cc->Options().GetExtension(
|
||||
::mediapipe::ConstantSidePacketCalculatorOptions::ext);
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
|
||||
int index = 0;
|
||||
for (CollectionItemId id = cc->OutputSidePackets().BeginId(kPacketTag);
|
||||
id != cc->OutputSidePackets().EndId(kPacketTag); ++id, ++index) {
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
|
||||
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
|
||||
|
||||
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Splits an input packet with NormalizedLandmarkList into
|
||||
// multiple NormalizedLandmarkList output packets using the [begin, end) ranges
|
||||
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
|
||||
// set to true, all ranges should be of size 1 and all outputs will be elements
|
||||
// of type NormalizedLandmark. If "element_only" is false, ranges can be
|
||||
// non-zero in size and all outputs will be of type NormalizedLandmarkList.
|
||||
// If the option "combine_outputs" is set to true, only one output stream can be
|
||||
// specified and all ranges of elements will be combined into one
|
||||
// NormalizedLandmarkList.
|
||||
class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().NumEntries() == 1);
|
||||
RET_CHECK(cc->Outputs().NumEntries() != 0);
|
||||
|
||||
cc->Inputs().Index(0).Set<NormalizedLandmarkList>();
|
||||
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
|
||||
|
||||
if (options.combine_outputs()) {
|
||||
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
|
||||
cc->Outputs().Index(0).Set<NormalizedLandmarkList>();
|
||||
for (int i = 0; i < options.ranges_size() - 1; ++i) {
|
||||
for (int j = i + 1; j < options.ranges_size(); ++j) {
|
||||
const auto& range_0 = options.ranges(i);
|
||||
const auto& range_1 = options.ranges(j);
|
||||
if ((range_0.begin() >= range_1.begin() &&
|
||||
range_0.begin() < range_1.end()) ||
|
||||
(range_1.begin() >= range_0.begin() &&
|
||||
range_1.begin() < range_0.end())) {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"Ranges must be non-overlapping when using combine_outputs "
|
||||
"option.");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (cc->Outputs().NumEntries() != options.ranges_size()) {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"The number of output streams should match the number of ranges "
|
||||
"specified in the CalculatorOptions.");
|
||||
}
|
||||
|
||||
// Set the output types for each output stream.
|
||||
for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
|
||||
if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 ||
|
||||
options.ranges(i).begin() >= options.ranges(i).end()) {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"Indices should be non-negative and begin index should be less "
|
||||
"than the end index.");
|
||||
}
|
||||
if (options.element_only()) {
|
||||
if (options.ranges(i).end() - options.ranges(i).begin() != 1) {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"Since element_only is true, all ranges should be of size 1.");
|
||||
}
|
||||
cc->Outputs().Index(i).Set<NormalizedLandmark>();
|
||||
} else {
|
||||
cc->Outputs().Index(i).Set<NormalizedLandmarkList>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
|
||||
|
||||
element_only_ = options.element_only();
|
||||
combine_outputs_ = options.combine_outputs();
|
||||
|
||||
for (const auto& range : options.ranges()) {
|
||||
ranges_.push_back({range.begin(), range.end()});
|
||||
max_range_end_ = std::max(max_range_end_, range.end());
|
||||
total_elements_ += range.end() - range.begin();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override {
|
||||
const NormalizedLandmarkList& input =
|
||||
cc->Inputs().Index(0).Get<NormalizedLandmarkList>();
|
||||
RET_CHECK_GE(input.landmark_size(), max_range_end_);
|
||||
|
||||
if (combine_outputs_) {
|
||||
NormalizedLandmarkList output;
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||
const NormalizedLandmark& input_landmark = input.landmark(j);
|
||||
*output.add_landmark() = input_landmark;
|
||||
}
|
||||
}
|
||||
RET_CHECK_EQ(output.landmark_size(), total_elements_);
|
||||
cc->Outputs().Index(0).AddPacket(
|
||||
MakePacket<NormalizedLandmarkList>(output).At(cc->InputTimestamp()));
|
||||
} else {
|
||||
if (element_only_) {
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
cc->Outputs().Index(i).AddPacket(
|
||||
MakePacket<NormalizedLandmark>(input.landmark(ranges_[i].first))
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
NormalizedLandmarkList output;
|
||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||
const NormalizedLandmark& input_landmark = input.landmark(j);
|
||||
*output.add_landmark() = input_landmark;
|
||||
}
|
||||
cc->Outputs().Index(i).AddPacket(
|
||||
MakePacket<NormalizedLandmarkList>(output).At(
|
||||
cc->InputTimestamp()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<int32, int32>> ranges_;
|
||||
int32 max_range_end_ = -1;
|
||||
int32 total_elements_ = 0;
|
||||
bool element_only_ = false;
|
||||
bool combine_outputs_ = false;
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_
|
|
@ -0,0 +1,404 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
|
||||
#include "mediapipe/framework/tool/validate_type.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
constexpr float kLocationVal = 3;
|
||||
|
||||
class SplitNormalizedLandmarkListCalculatorTest : public ::testing::Test {
|
||||
protected:
|
||||
void TearDown() { expected_landmarks_.reset(); }
|
||||
|
||||
void PrepareNormalizedLandmarkList(int list_size) {
|
||||
// Prepare input landmark list.
|
||||
input_landmarks_ = absl::make_unique<NormalizedLandmarkList>();
|
||||
expected_landmarks_ = absl::make_unique<NormalizedLandmarkList>();
|
||||
for (int i = 0; i < list_size; ++i) {
|
||||
NormalizedLandmark* landmark = input_landmarks_->add_landmark();
|
||||
landmark->set_x(i * kLocationVal);
|
||||
landmark->set_y(i * kLocationVal);
|
||||
landmark->set_z(i * kLocationVal);
|
||||
// Save the landmarks for comparison after the graph runs.
|
||||
*expected_landmarks_->add_landmark() = *landmark;
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateListOutput(std::vector<Packet>& output_packets,
|
||||
int expected_elements, int input_begin_index) {
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const NormalizedLandmarkList& output_landmarks =
|
||||
output_packets[0].Get<NormalizedLandmarkList>();
|
||||
ASSERT_EQ(expected_elements, output_landmarks.landmark_size());
|
||||
|
||||
for (int i = 0; i < expected_elements; ++i) {
|
||||
const NormalizedLandmark& expected_landmark =
|
||||
expected_landmarks_->landmark(input_begin_index + i);
|
||||
const NormalizedLandmark& result = output_landmarks.landmark(i);
|
||||
EXPECT_FLOAT_EQ(expected_landmark.x(), result.x());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.y(), result.y());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.z(), result.z());
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateCombinedListOutput(std::vector<Packet>& output_packets,
|
||||
int expected_elements,
|
||||
std::vector<int>& input_begin_indices,
|
||||
std::vector<int>& input_end_indices) {
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
ASSERT_EQ(input_begin_indices.size(), input_end_indices.size());
|
||||
const NormalizedLandmarkList& output_landmarks =
|
||||
output_packets[0].Get<NormalizedLandmarkList>();
|
||||
ASSERT_EQ(expected_elements, output_landmarks.landmark_size());
|
||||
const int num_ranges = input_begin_indices.size();
|
||||
|
||||
int element_id = 0;
|
||||
for (int range_id = 0; range_id < num_ranges; ++range_id) {
|
||||
for (int i = input_begin_indices[range_id];
|
||||
i < input_end_indices[range_id]; ++i) {
|
||||
const NormalizedLandmark& expected_landmark =
|
||||
expected_landmarks_->landmark(i);
|
||||
const NormalizedLandmark& result =
|
||||
output_landmarks.landmark(element_id);
|
||||
EXPECT_FLOAT_EQ(expected_landmark.x(), result.x());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.y(), result.y());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.z(), result.z());
|
||||
element_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateElementOutput(std::vector<Packet>& output_packets,
|
||||
int input_begin_index) {
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
const NormalizedLandmark& output_landmark =
|
||||
output_packets[0].Get<NormalizedLandmark>();
|
||||
ASSERT_TRUE(output_landmark.IsInitialized());
|
||||
|
||||
const NormalizedLandmark& expected_landmark =
|
||||
expected_landmarks_->landmark(input_begin_index);
|
||||
|
||||
EXPECT_FLOAT_EQ(expected_landmark.x(), output_landmark.x());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.y(), output_landmark.y());
|
||||
EXPECT_FLOAT_EQ(expected_landmark.z(), output_landmark.z());
|
||||
}
|
||||
|
||||
std::unique_ptr<NormalizedLandmarkList> input_landmarks_ = nullptr;
|
||||
std::unique_ptr<NormalizedLandmarkList> expected_landmarks_ = nullptr;
|
||||
std::unique_ptr<CalculatorRunner> runner_ = nullptr;
|
||||
};
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTest) {
|
||||
PrepareNormalizedLandmarkList(/*list_size=*/5);
|
||||
ASSERT_NE(input_landmarks_, nullptr);
|
||||
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
output_stream: "range_1"
|
||||
output_stream: "range_2"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 4 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> range_0_packets;
|
||||
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
|
||||
std::vector<Packet> range_1_packets;
|
||||
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
|
||||
std::vector<Packet> range_2_packets;
|
||||
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
|
||||
// Wait until the calculator finishes processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
ValidateListOutput(range_0_packets, /*expected_elements=*/1,
|
||||
/*input_begin_index=*/0);
|
||||
ValidateListOutput(range_1_packets, /*expected_elements=*/3,
|
||||
/*input_begin_index=*/1);
|
||||
ValidateListOutput(range_2_packets, /*expected_elements=*/1,
|
||||
/*input_begin_index=*/4);
|
||||
|
||||
// Fully close the graph at the end.
|
||||
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest, InvalidRangeTest) {
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
// The graph should fail running because of an invalid range (begin == end).
|
||||
ASSERT_FALSE(graph.Initialize(graph_config).ok());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
|
||||
InvalidOutputStreamCountTest) {
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
output_stream: "range_1"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
// The graph should fail running because the number of output streams does not
|
||||
// match the number of range elements in the options.
|
||||
ASSERT_FALSE(graph.Initialize(graph_config).ok());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
|
||||
InvalidCombineOutputsMultipleOutputsTest) {
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
output_stream: "range_1"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
// The graph should fail running because the number of output streams does not
|
||||
// match the number of range elements in the options.
|
||||
ASSERT_FALSE(graph.Initialize(graph_config).ok());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
|
||||
InvalidOverlappingRangesTest) {
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 3 }
|
||||
ranges: { begin: 1 end: 4 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
// The graph should fail running because there are overlapping ranges.
|
||||
ASSERT_FALSE(graph.Initialize(graph_config).ok());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestElementOnly) {
|
||||
PrepareNormalizedLandmarkList(/*list_size=*/5);
|
||||
ASSERT_NE(input_landmarks_, nullptr);
|
||||
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
output_stream: "range_1"
|
||||
output_stream: "range_2"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> range_0_packets;
|
||||
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
|
||||
std::vector<Packet> range_1_packets;
|
||||
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
|
||||
std::vector<Packet> range_2_packets;
|
||||
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
|
||||
// Wait until the calculator finishes processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
ValidateElementOutput(range_0_packets,
|
||||
/*input_begin_index=*/0);
|
||||
ValidateElementOutput(range_1_packets,
|
||||
/*input_begin_index=*/2);
|
||||
ValidateElementOutput(range_2_packets,
|
||||
/*input_begin_index=*/4);
|
||||
|
||||
// Fully close the graph at the end.
|
||||
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestCombiningOutputs) {
|
||||
PrepareNormalizedLandmarkList(/*list_size=*/5);
|
||||
ASSERT_NE(input_landmarks_, nullptr);
|
||||
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> range_0_packets;
|
||||
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0))));
|
||||
// Wait until the calculator finishes processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
std::vector<int> input_begin_indices = {0, 2, 4};
|
||||
std::vector<int> input_end_indices = {1, 3, 5};
|
||||
ValidateCombinedListOutput(range_0_packets, /*expected_elements=*/3,
|
||||
input_begin_indices, input_end_indices);
|
||||
|
||||
// Fully close the graph at the end.
|
||||
MP_ASSERT_OK(graph.CloseInputStream("landmarks_in"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST_F(SplitNormalizedLandmarkListCalculatorTest,
|
||||
ElementOnlyDisablesVectorOutputs) {
|
||||
// Prepare a graph to use the SplitNormalizedLandmarkListCalculator.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"(
|
||||
input_stream: "landmarks_in"
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks_in"
|
||||
output_stream: "range_0"
|
||||
output_stream: "range_1"
|
||||
output_stream: "range_2"
|
||||
options {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 4 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
ASSERT_FALSE(graph.Initialize(graph_config).ok());
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -376,13 +376,12 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
|
||||
::mediapipe::Status ImageTransformationCalculator::RenderCpu(
|
||||
CalculatorContext* cc) {
|
||||
int input_width = cc->Inputs().Tag("IMAGE").Get<ImageFrame>().Width();
|
||||
int input_height = cc->Inputs().Tag("IMAGE").Get<ImageFrame>().Height();
|
||||
|
||||
const auto& input_img = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
|
||||
cv::Mat input_mat = formats::MatView(&input_img);
|
||||
cv::Mat scaled_mat;
|
||||
|
||||
const int input_width = input_img.Width();
|
||||
const int input_height = input_img.Height();
|
||||
if (!output_height_ || !output_width_) {
|
||||
output_height_ = input_height;
|
||||
output_width_ = input_width;
|
||||
|
@ -455,8 +454,9 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
::mediapipe::Status ImageTransformationCalculator::RenderGpu(
|
||||
CalculatorContext* cc) {
|
||||
#if !defined(MEDIAPIPE_DISABLE_GPU)
|
||||
int input_width = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>().width();
|
||||
int input_height = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>().height();
|
||||
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>();
|
||||
const int input_width = input.width();
|
||||
const int input_height = input.height();
|
||||
|
||||
int output_width;
|
||||
int output_height;
|
||||
|
@ -472,7 +472,6 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
.Add(padding.release(), cc->InputTimestamp());
|
||||
}
|
||||
|
||||
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>();
|
||||
QuadRenderer* renderer = nullptr;
|
||||
GlTexture src1;
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
::mediapipe::Status VerifySequence() {
|
||||
std::string error_msg = "Missing features - ";
|
||||
bool all_present = true;
|
||||
for (auto iter : features_present_) {
|
||||
for (const auto& iter : features_present_) {
|
||||
if (!iter.second) {
|
||||
all_present = false;
|
||||
absl::StrAppend(&error_msg, iter.first, ", ");
|
||||
|
|
|
@ -126,7 +126,7 @@ class TensorFlowSessionFromSavedModelCalculator : public CalculatorBase {
|
|||
// Set user specified tags properly.
|
||||
// If no tags specified will use tensorflow::kSavedModelTagServe by default.
|
||||
std::unordered_set<std::string> tags_set;
|
||||
for (std::string tag : options.saved_model_tag()) {
|
||||
for (const std::string& tag : options.saved_model_tag()) {
|
||||
tags_set.insert(tag);
|
||||
}
|
||||
if (tags_set.empty()) {
|
||||
|
|
|
@ -121,7 +121,7 @@ class TensorFlowSessionFromSavedModelGenerator : public PacketGenerator {
|
|||
// Set user specified tags properly.
|
||||
// If no tags specified will use tensorflow::kSavedModelTagServe by default.
|
||||
std::unordered_set<std::string> tags_set;
|
||||
for (std::string tag : options.saved_model_tag()) {
|
||||
for (const std::string& tag : options.saved_model_tag()) {
|
||||
tags_set.insert(tag);
|
||||
}
|
||||
if (tags_set.empty()) {
|
||||
|
|
|
@ -513,6 +513,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
|
|||
if (gpu_inference_) {
|
||||
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
for (int i = 0; i < gpu_data_in_.size(); ++i) {
|
||||
gpu_data_in_[i].reset();
|
||||
|
@ -523,6 +524,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
|
|||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
#elif defined(MEDIAPIPE_IOS)
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
for (int i = 0; i < gpu_data_in_.size(); ++i) {
|
||||
gpu_data_in_[i].reset();
|
||||
|
@ -532,6 +534,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
|
|||
}
|
||||
#endif
|
||||
} else {
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ void ConvertAnchorsToRawValues(const std::vector<Anchor>& anchors,
|
|||
int num_boxes, float* raw_anchors) {
|
||||
CHECK_EQ(anchors.size(), num_boxes);
|
||||
int box = 0;
|
||||
for (auto anchor : anchors) {
|
||||
for (const auto& anchor : anchors) {
|
||||
raw_anchors[box * kNumCoordsPerBox + 0] = anchor.y_center();
|
||||
raw_anchors[box * kNumCoordsPerBox + 1] = anchor.x_center();
|
||||
raw_anchors[box * kNumCoordsPerBox + 2] = anchor.h();
|
||||
|
|
|
@ -71,8 +71,10 @@ REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator);
|
|||
const auto& options =
|
||||
cc->Options<::mediapipe::DetectionLabelIdToTextCalculatorOptions>();
|
||||
|
||||
if (options.has_label_map_path()) {
|
||||
std::string string_path;
|
||||
ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path()));
|
||||
ASSIGN_OR_RETURN(string_path,
|
||||
PathToResourceAsFile(options.label_map_path()));
|
||||
std::string label_map_string;
|
||||
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
|
||||
|
||||
|
@ -82,6 +84,11 @@ REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator);
|
|||
while (std::getline(stream, line)) {
|
||||
label_map_[i++] = line;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < options.label_size(); ++i) {
|
||||
label_map_[i] = options.label(i);
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -25,4 +25,10 @@ message DetectionLabelIdToTextCalculatorOptions {
|
|||
|
||||
// Path to a label map file for getting the actual name of detected classes.
|
||||
optional string label_map_path = 1;
|
||||
|
||||
// Alternative way to specify label map
|
||||
// label: "label for id 0"
|
||||
// label: "label for id 1"
|
||||
// ...
|
||||
repeated string label = 2;
|
||||
}
|
||||
|
|
|
@ -186,6 +186,7 @@ class LandmarksToRenderDataCalculator : public CalculatorBase {
|
|||
|
||||
private:
|
||||
LandmarksToRenderDataCalculatorOptions options_;
|
||||
std::vector<int> landmark_connections_;
|
||||
};
|
||||
REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
||||
|
||||
|
@ -217,6 +218,14 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<LandmarksToRenderDataCalculatorOptions>();
|
||||
|
||||
// Parse landmarks connections to a vector.
|
||||
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
|
||||
<< "Number of entries in landmark connections must be a multiple of 2";
|
||||
|
||||
for (int i = 0; i < options_.landmark_connections_size(); ++i) {
|
||||
landmark_connections_.push_back(options_.landmark_connections(i));
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -236,14 +245,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
thickness *= render_scale;
|
||||
}
|
||||
|
||||
// Parse landmarks connections to a vector.
|
||||
RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
|
||||
<< "Number of entries in landmark connections must be a multiple of 2";
|
||||
std::vector<int> landmark_connections;
|
||||
for (int i = 0; i < options_.landmark_connections_size(); i += 1) {
|
||||
landmark_connections.push_back(options_.landmark_connections(i));
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kLandmarksTag)) {
|
||||
const LandmarkList& landmarks =
|
||||
cc->Inputs().Tag(kLandmarksTag).Get<LandmarkList>();
|
||||
|
@ -252,6 +253,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
}
|
||||
// Only change rendering if there are actually z values other than 0.
|
||||
visualize_depth &= ((z_max - z_min) > 1e-3);
|
||||
if (visualize_depth) {
|
||||
AddConnectionsWithDepth<LandmarkList, Landmark>(
|
||||
landmarks, landmark_connections_, thickness, /*normalized=*/false,
|
||||
z_min, z_max, render_data.get());
|
||||
} else {
|
||||
AddConnections<LandmarkList, Landmark>(
|
||||
landmarks, landmark_connections_, options_.connection_color(),
|
||||
thickness, /*normalized=*/false, render_data.get());
|
||||
}
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
const Landmark& landmark = landmarks.landmark(i);
|
||||
auto* landmark_data_render = AddPointRenderData(
|
||||
|
@ -265,15 +275,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
landmark_data->set_x(landmark.x());
|
||||
landmark_data->set_y(landmark.y());
|
||||
}
|
||||
if (visualize_depth) {
|
||||
AddConnectionsWithDepth<LandmarkList, Landmark>(
|
||||
landmarks, landmark_connections, thickness, /*normalized=*/false,
|
||||
z_min, z_max, render_data.get());
|
||||
} else {
|
||||
AddConnections<LandmarkList, Landmark>(
|
||||
landmarks, landmark_connections, options_.connection_color(),
|
||||
thickness, /*normalized=*/false, render_data.get());
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kNormLandmarksTag)) {
|
||||
|
@ -285,6 +286,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
}
|
||||
// Only change rendering if there are actually z values other than 0.
|
||||
visualize_depth &= ((z_max - z_min) > 1e-3);
|
||||
if (visualize_depth) {
|
||||
AddConnectionsWithDepth<NormalizedLandmarkList, NormalizedLandmark>(
|
||||
landmarks, landmark_connections_, thickness, /*normalized=*/true,
|
||||
z_min, z_max, render_data.get());
|
||||
} else {
|
||||
AddConnections<NormalizedLandmarkList, NormalizedLandmark>(
|
||||
landmarks, landmark_connections_, options_.connection_color(),
|
||||
thickness, /*normalized=*/true, render_data.get());
|
||||
}
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
const NormalizedLandmark& landmark = landmarks.landmark(i);
|
||||
auto* landmark_data_render = AddPointRenderData(
|
||||
|
@ -298,15 +308,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
|
|||
landmark_data->set_x(landmark.x());
|
||||
landmark_data->set_y(landmark.y());
|
||||
}
|
||||
if (visualize_depth) {
|
||||
AddConnectionsWithDepth<NormalizedLandmarkList, NormalizedLandmark>(
|
||||
landmarks, landmark_connections, thickness, /*normalized=*/true,
|
||||
z_min, z_max, render_data.get());
|
||||
} else {
|
||||
AddConnections<NormalizedLandmarkList, NormalizedLandmark>(
|
||||
landmarks, landmark_connections, options_.connection_color(),
|
||||
thickness, /*normalized=*/true, render_data.get());
|
||||
}
|
||||
}
|
||||
|
||||
cc->Outputs()
|
||||
|
|
|
@ -73,6 +73,18 @@ can be easily adapted to run on CPU v.s. GPU.
|
|||
* [Android](./face_detection_mobile_cpu.md)
|
||||
* [iOS](./face_detection_mobile_cpu.md)
|
||||
|
||||
### Face Mesh with GPU
|
||||
|
||||
[Face Mesh with GPU](./face_mesh_mobile_gpu.md) illustrates how to run the
|
||||
MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time
|
||||
on mobile devices, utilizing GPU acceleration. The pipeline is based on
|
||||
["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724),
|
||||
and details of the underlying ML models are described in the
|
||||
[model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view).
|
||||
|
||||
* [Android](./face_mesh_mobile_gpu.md)
|
||||
* [iOS](./face_mesh_mobile_gpu.md)
|
||||
|
||||
### Hand Detection with GPU
|
||||
|
||||
[Hand Detection with GPU](./hand_detection_mobile_gpu.md) illustrates how to use
|
||||
|
@ -84,7 +96,7 @@ MediaPipe with a TFLite model for hand detection in a GPU-accelerated pipeline.
|
|||
### Hand Tracking with GPU
|
||||
|
||||
[Hand Tracking with GPU](./hand_tracking_mobile_gpu.md) illustrates how to use
|
||||
MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline.
|
||||
MediaPipe with TFLite models for hand tracking in a GPU-accelerated pipeline.
|
||||
|
||||
* [Android](./hand_tracking_mobile_gpu.md)
|
||||
* [iOS](./hand_tracking_mobile_gpu.md)
|
||||
|
@ -92,7 +104,7 @@ MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline.
|
|||
### Multi-Hand Tracking with GPU
|
||||
|
||||
[Multi-Hand Tracking with GPU](./multi_hand_tracking_mobile_gpu.md) illustrates
|
||||
how to use MediaPipe with a TFLite model for multi-hand tracking in a
|
||||
how to use MediaPipe with TFLite models for multi-hand tracking in a
|
||||
GPU-accelerated pipeline.
|
||||
|
||||
* [Android](./multi_hand_tracking_mobile_gpu.md)
|
||||
|
@ -150,11 +162,20 @@ GPU with live video from a webcam.
|
|||
* [Desktop GPU](./face_detection_desktop.md)
|
||||
* [Desktop CPU](./face_detection_desktop.md)
|
||||
|
||||
### Face Mesh on Desktop with Webcam
|
||||
|
||||
[Face Mesh on Desktop with Webcam](./face_mesh_desktop.md) shows how to run the
|
||||
MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time
|
||||
on desktop with webcam input.
|
||||
|
||||
* [Desktop GPU](./face_mesh_desktop.md)
|
||||
* [Desktop CPU](./face_mesh_desktop.md)
|
||||
|
||||
|
||||
### Hand Tracking on Desktop with Webcam
|
||||
|
||||
[Hand Tracking on Desktop with Webcam](./hand_tracking_desktop.md) shows how to
|
||||
use MediaPipe with a TFLite model for hand tracking on desktop using CPU or GPU
|
||||
use MediaPipe with TFLite models for hand tracking on desktop using CPU or GPU
|
||||
with live video from a webcam.
|
||||
|
||||
* [Desktop GPU](./hand_tracking_desktop.md)
|
||||
|
@ -163,8 +184,8 @@ with live video from a webcam.
|
|||
### Multi-Hand Tracking on Desktop with Webcam
|
||||
|
||||
[Multi-Hand Tracking on Desktop with Webcam](./multi_hand_tracking_desktop.md)
|
||||
shows how to use MediaPipe with a TFLite model for multi-hand tracking on
|
||||
desktop using CPU or GPU with live video from a webcam.
|
||||
shows how to use MediaPipe with TFLite models for multi-hand tracking on desktop
|
||||
using CPU or GPU with live video from a webcam.
|
||||
|
||||
* [Desktop GPU](./multi_hand_tracking_desktop.md)
|
||||
* [Desktop CPU](./multi_hand_tracking_desktop.md)
|
||||
|
|
|
@ -4,6 +4,8 @@ This doc focuses on the
|
|||
[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_detection/face_detection_mobile_cpu.pbtxt)
|
||||
that performs face detection with TensorFlow Lite on CPU.
|
||||
|
||||
![face_detection_android_gpu_gif](images/mobile/face_detection_android_gpu.gif)
|
||||
|
||||
## Android
|
||||
|
||||
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu)
|
||||
|
|
58
mediapipe/docs/face_mesh_desktop.md
Normal file
|
@ -0,0 +1,58 @@
|
|||
## Face Mesh on Desktop with Webcam
|
||||
|
||||
This doc focuses on running the **MediaPipe Face Mesh** pipeline to perform 3D
|
||||
face landmark estimation in real-time on desktop with webcam input. The pipeline
|
||||
internally incorporates TensorFlow Lite models. To know more about the models,
|
||||
please refer to the model
|
||||
[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh).
|
||||
Moreover, if you are interested in running the same pipeline on Android/iOS,
|
||||
please see [Face Mesh on Android/iOS](face_mesh_mobile_gpu.md).
|
||||
|
||||
- [Face Mesh on Desktop with Webcam (CPU)](#face-mesh-on-desktop-with-webcam-cpu)
|
||||
|
||||
- [Face Mesh on Desktop with Webcam (GPU)](#face-mesh-on-desktop-with-webcam-gpu)
|
||||
|
||||
Note: Desktop GPU works only on Linux. Mesa drivers need to be installed. Please
|
||||
see
|
||||
[step 4 of "Installing on Debian and Ubuntu" in the installation guide](./install.md).
|
||||
|
||||
Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section.
|
||||
|
||||
### Face Mesh on Desktop with Webcam (CPU)
|
||||
|
||||
To build and run Face Mesh on desktop with webcam (CPU), run:
|
||||
|
||||
```bash
|
||||
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
mediapipe/examples/desktop/face_mesh:face_mesh_cpu
|
||||
|
||||
# It should print:
|
||||
# Target //mediapipe/examples/desktop/face_mesh:face_mesh_cpu up-to-date:
|
||||
# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu
|
||||
|
||||
# This will open up your webcam as long as it is connected. Errors are likely
|
||||
# due to your webcam being not accessible.
|
||||
$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu \
|
||||
--calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
|
||||
```
|
||||
|
||||
### Face Mesh on Desktop with Webcam (GPU)
|
||||
|
||||
Note: please first [check that your GPU is supported](gpu.md#desktop-gpu-linux).
|
||||
|
||||
To build and run Face Mesh on desktop with webcam (GPU), run:
|
||||
|
||||
```bash
|
||||
# This works only for linux currently
|
||||
$ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \
|
||||
mediapipe/examples/desktop/face_mesh:face_mesh_gpu
|
||||
|
||||
# It should print:
|
||||
# Target //mediapipe/examples/desktop/face_mesh:face_mesh_gpu up-to-date:
|
||||
# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu
|
||||
|
||||
# This will open up your webcam as long as it is connected. Errors are likely
|
||||
# due to your webcam being not accessible, or GPU drivers not setup properly.
|
||||
$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu \
|
||||
--calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
|
||||
```
|
90
mediapipe/docs/face_mesh_mobile_gpu.md
Normal file
|
@ -0,0 +1,90 @@
|
|||
# Face Mesh (GPU)
|
||||
|
||||
This example focuses on running the **MediaPipe Face Mesh** pipeline on mobile
|
||||
devices to perform 3D face landmark estimation in real-time, utilizing GPU
|
||||
acceleration. The pipeline internally incorporates TensorFlow Lite models. To
|
||||
know more about the models, please refer to the model
|
||||
[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh).
|
||||
The pipeline is related to the
|
||||
[face detection example](./face_detection_mobile_gpu.md) as it internally
|
||||
utilizes face detection and performs landmark estimation only within the
|
||||
detected region.
|
||||
|
||||
![face_mesh_android_gpu.gif](images/mobile/face_mesh_android_gpu.gif)
|
||||
|
||||
**MediaPipe Face Mesh** generates 468 3D face landmarks in real-time on mobile
|
||||
devices. In the visualization above, the red dots represent the landmarks, and
|
||||
the green lines connecting landmarks illustrate the contours around the eyes,
|
||||
eyebrows, lips and the entire face.
|
||||
|
||||
## Android
|
||||
|
||||
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu)
|
||||
|
||||
A prebuilt arm64 APK can be
|
||||
[downloaded here](https://drive.google.com/open?id=1pUmd7CXCL_onYMbsZo5p91cH0oNnR4gi).
|
||||
|
||||
To build the app yourself, run:
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu
|
||||
```
|
||||
|
||||
Once the app is built, install it on Android device with:
|
||||
|
||||
```bash
|
||||
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/facemeshgpu.apk
|
||||
```
|
||||
|
||||
## iOS
|
||||
|
||||
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu).
|
||||
|
||||
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
|
||||
examples and generating an Xcode project. This will be the FaceMeshGpuApp
|
||||
target.
|
||||
|
||||
To build on the command line:
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp
|
||||
```
|
||||
|
||||
## Graph
|
||||
|
||||
The face mesh [main graph](#main-graph) utilizes a
|
||||
[face landmark subgraph](#face-landmark-subgraph) from the
|
||||
[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark),
|
||||
and renders using a dedicated [face renderer subgraph](#face-renderer-subgraph).
|
||||
|
||||
The subgraphs show up in the main graph visualization as nodes colored in
|
||||
purple, and the subgraph itself can also be visualized just like a regular
|
||||
graph. For more information on how to visualize a graph that includes subgraphs,
|
||||
see the Visualizing Subgraphs section in the
|
||||
[visualizer documentation](./visualizer.md).
|
||||
|
||||
### Main Graph
|
||||
|
||||
![face_mesh_mobile_graph](images/mobile/face_mesh_mobile.png)
|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
|
||||
|
||||
### Face Landmark Subgraph
|
||||
|
||||
The
|
||||
[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark)
|
||||
contains several subgraphs that can be used to detect and track face landmarks.
|
||||
In particular, in this example the
|
||||
[FaceLandmarkFrontGPU](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
||||
subgraph, suitable for images from front-facing cameras (i.e., selfie images)
|
||||
and utilizing GPU acceleration, is selected.
|
||||
|
||||
![face_landmark_front_gpu_subgraph](images/mobile/face_landmark_front_gpu_subgraph.png)
|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
||||
|
||||
### Face Renderer Subgraph
|
||||
|
||||
![face_renderer_gpu_subgraph](images/mobile/face_renderer_gpu_subgraph.png)
|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt)
|
BIN
mediapipe/docs/images/logo_horizontal_black.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
mediapipe/docs/images/logo_horizontal_white.png
Normal file
After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 9.4 KiB |
Before Width: | Height: | Size: 1.8 MiB After Width: | Height: | Size: 2.3 MiB |
Before Width: | Height: | Size: 350 KiB After Width: | Height: | Size: 808 KiB |
After Width: | Height: | Size: 121 KiB |
BIN
mediapipe/docs/images/mobile/face_mesh_android_gpu.gif
Normal file
After Width: | Height: | Size: 3.3 MiB |
BIN
mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif
Normal file
After Width: | Height: | Size: 1.0 MiB |
BIN
mediapipe/docs/images/mobile/face_mesh_mobile.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
mediapipe/docs/images/mobile/face_renderer_gpu_subgraph.png
Normal file
After Width: | Height: | Size: 59 KiB |
Before Width: | Height: | Size: 2.1 MiB After Width: | Height: | Size: 1.3 MiB |
Before Width: | Height: | Size: 430 KiB After Width: | Height: | Size: 460 KiB |
Before Width: | Height: | Size: 2.8 MiB After Width: | Height: | Size: 3.1 MiB |
After Width: | Height: | Size: 383 KiB |
Before Width: | Height: | Size: 3.5 MiB After Width: | Height: | Size: 5.6 MiB |
Before Width: | Height: | Size: 1.6 MiB |
Before Width: | Height: | Size: 3.6 MiB After Width: | Height: | Size: 4.7 MiB |
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 448 KiB |
Before Width: | Height: | Size: 2.2 MiB After Width: | Height: | Size: 1.5 MiB |
After Width: | Height: | Size: 475 KiB |
Before Width: | Height: | Size: 2.6 MiB After Width: | Height: | Size: 1.3 MiB |
Before Width: | Height: | Size: 1.7 MiB |
Before Width: | Height: | Size: 1.6 MiB After Width: | Height: | Size: 1004 KiB |
Before Width: | Height: | Size: 1.5 MiB After Width: | Height: | Size: 945 KiB |
Before Width: | Height: | Size: 282 KiB After Width: | Height: | Size: 336 KiB |
Before Width: | Height: | Size: 8.6 MiB After Width: | Height: | Size: 2.2 MiB |
After Width: | Height: | Size: 2.2 MiB |
Before Width: | Height: | Size: 1.3 MiB After Width: | Height: | Size: 730 KiB |
After Width: | Height: | Size: 670 KiB |
After Width: | Height: | Size: 661 KiB |
|
@ -12,7 +12,7 @@ We show the object detection demo with both TensorFlow model and TensorFlow Lite
|
|||
|
||||
- [TensorFlow Object Detection Demo](#tensorflow-object-detection-demo)
|
||||
- [TensorFlow Lite Object Detection Demo](#tensorflow-lite-object-detection-demo)
|
||||
- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo)
|
||||
- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo-with-webcam-cpu)
|
||||
|
||||
Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section.
|
||||
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.google.mediapipe.apps.facemeshgpu">
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="21"
|
||||
android:targetSdkVersion="27" />
|
||||
|
||||
<!-- For using the camera -->
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-feature android:name="android.hardware.camera" />
|
||||
<uses-feature android:name="android.hardware.camera.autofocus" />
|
||||
<!-- For MediaPipe -->
|
||||
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
|
||||
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:label="@string/app_name"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true"
|
||||
android:screenOrientation="portrait">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
|
@ -0,0 +1,82 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
cc_binary(
|
||||
name = "libmediapipe_jni.so",
|
||||
linkshared = 1,
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//mediapipe/graphs/face_mesh:mobile_calculators",
|
||||
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mediapipe_jni_lib",
|
||||
srcs = [":libmediapipe_jni.so"],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
|
||||
# easily incorporated into the app via, for example,
|
||||
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
|
||||
genrule(
|
||||
name = "binary_graph",
|
||||
srcs = ["//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph"],
|
||||
outs = ["facemeshgpu.binarypb"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
android_library(
|
||||
name = "mediapipe_lib",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
":binary_graph",
|
||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "AndroidManifest.xml",
|
||||
resource_files = glob(["res/**"]),
|
||||
deps = [
|
||||
":mediapipe_jni_lib",
|
||||
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
|
||||
"//mediapipe/java/com/google/mediapipe/components:android_components",
|
||||
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||
"//mediapipe/java/com/google/mediapipe/glutil",
|
||||
"//third_party:androidx_appcompat",
|
||||
"//third_party:androidx_constraint_layout",
|
||||
"//third_party:androidx_legacy_support_v4",
|
||||
"//third_party:androidx_recyclerview",
|
||||
"//third_party:opencv",
|
||||
"@maven//:androidx_concurrent_concurrent_futures",
|
||||
"@maven//:androidx_lifecycle_lifecycle_common",
|
||||
"@maven//:com_google_guava_guava",
|
||||
],
|
||||
)
|
||||
|
||||
android_binary(
|
||||
name = "facemeshgpu",
|
||||
manifest = "AndroidManifest.xml",
|
||||
manifest_values = {"applicationId": "com.google.mediapipe.apps.facemeshgpu"},
|
||||
multidex = "native",
|
||||
deps = [
|
||||
":mediapipe_lib",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,232 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.apps.facemeshgpu;
|
||||
|
||||
import android.graphics.SurfaceTexture;
|
||||
import android.os.Bundle;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import android.util.Log;
|
||||
import android.util.Size;
|
||||
import android.view.SurfaceHolder;
|
||||
import android.view.SurfaceView;
|
||||
import android.view.View;
|
||||
import android.view.ViewGroup;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||
import com.google.mediapipe.components.CameraHelper;
|
||||
import com.google.mediapipe.components.CameraXPreviewHelper;
|
||||
import com.google.mediapipe.components.ExternalTextureConverter;
|
||||
import com.google.mediapipe.components.FrameProcessor;
|
||||
import com.google.mediapipe.components.PermissionHelper;
|
||||
import com.google.mediapipe.framework.AndroidAssetUtil;
|
||||
import com.google.mediapipe.framework.AndroidPacketCreator;
|
||||
import com.google.mediapipe.framework.Packet;
|
||||
import com.google.mediapipe.framework.PacketGetter;
|
||||
import com.google.mediapipe.glutil.EglManager;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/** Main activity of MediaPipe example apps. */
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
private static final String TAG = "MainActivity";
|
||||
|
||||
private static final String BINARY_GRAPH_NAME = "facemeshgpu.binarypb";
|
||||
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
|
||||
private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces";
|
||||
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
|
||||
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks";
|
||||
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT;
|
||||
|
||||
// Max number of faces to detect/process.
|
||||
private static final int NUM_FACES = 1;
|
||||
|
||||
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
|
||||
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
|
||||
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
|
||||
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
|
||||
private static final boolean FLIP_FRAMES_VERTICALLY = true;
|
||||
|
||||
static {
|
||||
// Load all native libraries needed by the app.
|
||||
System.loadLibrary("mediapipe_jni");
|
||||
System.loadLibrary("opencv_java3");
|
||||
}
|
||||
|
||||
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
|
||||
private SurfaceTexture previewFrameTexture;
|
||||
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
|
||||
private SurfaceView previewDisplayView;
|
||||
|
||||
// Creates and manages an {@link EGLContext}.
|
||||
private EglManager eglManager;
|
||||
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
|
||||
// frames onto a {@link Surface}.
|
||||
private FrameProcessor processor;
|
||||
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
|
||||
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
|
||||
private ExternalTextureConverter converter;
|
||||
|
||||
// Handles camera access via the {@link CameraX} Jetpack support library.
|
||||
private CameraXPreviewHelper cameraHelper;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
|
||||
previewDisplayView = new SurfaceView(this);
|
||||
setupPreviewDisplayView();
|
||||
|
||||
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
|
||||
// binary graphs.
|
||||
AndroidAssetUtil.initializeNativeAssetManager(this);
|
||||
|
||||
eglManager = new EglManager(null);
|
||||
processor =
|
||||
new FrameProcessor(
|
||||
this,
|
||||
eglManager.getNativeContext(),
|
||||
BINARY_GRAPH_NAME,
|
||||
INPUT_VIDEO_STREAM_NAME,
|
||||
OUTPUT_VIDEO_STREAM_NAME);
|
||||
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
|
||||
|
||||
AndroidPacketCreator packetCreator = processor.getPacketCreator();
|
||||
Map<String, Packet> inputSidePackets = new HashMap<>();
|
||||
inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES));
|
||||
processor.setInputSidePackets(inputSidePackets);
|
||||
|
||||
processor.addPacketCallback(
|
||||
OUTPUT_LANDMARKS_STREAM_NAME,
|
||||
(packet) -> {
|
||||
Log.d(TAG, "Received multi face landmarks packet.");
|
||||
List<NormalizedLandmarkList> multiFaceLandmarks =
|
||||
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
|
||||
Log.d(
|
||||
TAG,
|
||||
"[TS:"
|
||||
+ packet.getTimestamp()
|
||||
+ "] "
|
||||
+ getMultiFaceLandmarksDebugString(multiFaceLandmarks));
|
||||
});
|
||||
|
||||
PermissionHelper.checkAndRequestCameraPermissions(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
converter = new ExternalTextureConverter(eglManager.getContext());
|
||||
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
|
||||
converter.setConsumer(processor);
|
||||
if (PermissionHelper.cameraPermissionsGranted(this)) {
|
||||
startCamera();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
converter.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRequestPermissionsResult(
|
||||
int requestCode, String[] permissions, int[] grantResults) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
}
|
||||
|
||||
private void setupPreviewDisplayView() {
|
||||
previewDisplayView.setVisibility(View.GONE);
|
||||
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
|
||||
viewGroup.addView(previewDisplayView);
|
||||
|
||||
previewDisplayView
|
||||
.getHolder()
|
||||
.addCallback(
|
||||
new SurfaceHolder.Callback() {
|
||||
@Override
|
||||
public void surfaceCreated(SurfaceHolder holder) {
|
||||
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
|
||||
// (Re-)Compute the ideal size of the camera-preview display (the area that the
|
||||
// camera-preview frames get rendered onto, potentially with scaling and rotation)
|
||||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture,
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void surfaceDestroyed(SurfaceHolder holder) {
|
||||
processor.getVideoSurfaceOutput().setSurface(null);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void startCamera() {
|
||||
cameraHelper = new CameraXPreviewHelper();
|
||||
cameraHelper.setOnCameraStartedListener(
|
||||
surfaceTexture -> {
|
||||
previewFrameTexture = surfaceTexture;
|
||||
// Make the display view visible to start showing the preview. This triggers the
|
||||
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
|
||||
previewDisplayView.setVisibility(View.VISIBLE);
|
||||
});
|
||||
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
|
||||
}
|
||||
|
||||
private static String getMultiFaceLandmarksDebugString(
|
||||
List<NormalizedLandmarkList> multiFaceLandmarks) {
|
||||
if (multiFaceLandmarks.isEmpty()) {
|
||||
return "No face landmarks";
|
||||
}
|
||||
String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n";
|
||||
int faceIndex = 0;
|
||||
for (NormalizedLandmarkList landmarks : multiFaceLandmarks) {
|
||||
multiFaceLandmarksStr +=
|
||||
"\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n";
|
||||
int landmarkIndex = 0;
|
||||
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
|
||||
multiFaceLandmarksStr +=
|
||||
"\t\tLandmark ["
|
||||
+ landmarkIndex
|
||||
+ "]: ("
|
||||
+ landmark.getX()
|
||||
+ ", "
|
||||
+ landmark.getY()
|
||||
+ ", "
|
||||
+ landmark.getZ()
|
||||
+ ")\n";
|
||||
++landmarkIndex;
|
||||
}
|
||||
++faceIndex;
|
||||
}
|
||||
return multiFaceLandmarksStr;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent">
|
||||
|
||||
<FrameLayout
|
||||
android:id="@+id/preview_display_layout"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="fill_parent"
|
||||
android:layout_weight="1">
|
||||
<TextView
|
||||
android:id="@+id/no_camera_access_view"
|
||||
android:layout_height="fill_parent"
|
||||
android:layout_width="fill_parent"
|
||||
android:gravity="center"
|
||||
android:text="@string/no_camera_access" />
|
||||
</FrameLayout>
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="colorPrimary">#008577</color>
|
||||
<color name="colorPrimaryDark">#00574B</color>
|
||||
<color name="colorAccent">#D81B60</color>
|
||||
</resources>
|
|
@ -0,0 +1,4 @@
|
|||
<resources>
|
||||
<string name="app_name" translatable="false">Face Mesh GPU</string>
|
||||
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
|
||||
</resources>
|
|
@ -0,0 +1,11 @@
|
|||
<resources>
|
||||
|
||||
<!-- Base application theme. -->
|
||||
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
|
||||
<!-- Customize your theme here. -->
|
||||
<item name="colorPrimary">@color/colorPrimary</item>
|
||||
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
|
||||
<item name="colorAccent">@color/colorAccent</item>
|
||||
</style>
|
||||
|
||||
</resources>
|
|
@ -150,4 +150,34 @@ message ConversionOptions {
|
|||
optional int32 target_height = 2;
|
||||
}
|
||||
|
||||
// TODO: Move other autoflip messages into this area.
|
||||
// Self-contained message that provides all needed information to render
|
||||
// autoflip with an external renderer. One of these messages is required for
|
||||
// each frame of the video.
|
||||
message ExternalRenderFrame {
|
||||
// Rectangle using opencv standard.
|
||||
message Rect {
|
||||
optional float x = 1;
|
||||
optional float y = 2;
|
||||
optional float width = 3;
|
||||
optional float height = 4;
|
||||
}
|
||||
// RGB color [0...255]
|
||||
message Color {
|
||||
optional int32 r = 1;
|
||||
optional int32 g = 2;
|
||||
optional int32 b = 3;
|
||||
}
|
||||
// Rect that must be cropped out of the input frame. It is in the
|
||||
// original dimensions of the input video. The first step to render this
|
||||
// frame is to crop this rect from the input frame.
|
||||
optional Rect crop_from_location = 1;
|
||||
// The placement location where the above rect is placed on the output frame.
|
||||
// This will always have the same aspect ratio as the above rect but scaling
|
||||
// may be required.
|
||||
optional Rect render_to_location = 2;
|
||||
// If render_to_location is smaller than the output dimensions of the frame,
|
||||
// fill the rest of the frame with this color.
|
||||
optional Color padding_color = 3;
|
||||
// Timestamp in microseconds of this frame.
|
||||
optional uint64 timestamp_us = 4;
|
||||
}
|
||||
|
|
|
@ -44,11 +44,19 @@ constexpr char kInputExternalSettings[] = "EXTERNAL_SETTINGS";
|
|||
// TargetSizeType::MAXIMIZE_TARGET_DIMENSION
|
||||
constexpr char kAspectRatio[] = "EXTERNAL_ASPECT_RATIO";
|
||||
|
||||
// Output the cropped frames, as well as visualization of crop regions and focus
|
||||
// points. Note that, KEY_FRAME_CROP_REGION_VIZ_FRAMES and
|
||||
// SALIENT_POINT_FRAME_VIZ_FRAMES can only be enabled when CROPPED_FRAMES is
|
||||
// enabled.
|
||||
constexpr char kOutputCroppedFrames[] = "CROPPED_FRAMES";
|
||||
constexpr char kOutputKeyFrameCropViz[] = "KEY_FRAME_CROP_REGION_VIZ_FRAMES";
|
||||
constexpr char kOutputFocusPointFrameViz[] = "SALIENT_POINT_FRAME_VIZ_FRAMES";
|
||||
constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
|
||||
|
||||
// External rendering outputs
|
||||
constexpr char kExternalRenderingPerFrame[] = "EXTERNAL_RENDERING_PER_FRAME";
|
||||
constexpr char kExternalRenderingFullVid[] = "EXTERNAL_RENDERING_FULL_VID";
|
||||
|
||||
::mediapipe::Status SceneCroppingCalculator::GetContract(
|
||||
::mediapipe::CalculatorContract* cc) {
|
||||
if (cc->InputSidePackets().HasTag(kInputExternalSettings)) {
|
||||
|
@ -67,16 +75,36 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
|
|||
}
|
||||
cc->Inputs().Tag(kInputShotBoundaries).Set<bool>();
|
||||
|
||||
if (cc->Outputs().HasTag(kOutputCroppedFrames)) {
|
||||
cc->Outputs().Tag(kOutputCroppedFrames).Set<ImageFrame>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
|
||||
RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames))
|
||||
<< "KEY_FRAME_CROP_REGION_VIZ_FRAMES can only be used when "
|
||||
"CROPPED_FRAMES is specified.";
|
||||
cc->Outputs().Tag(kOutputKeyFrameCropViz).Set<ImageFrame>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
|
||||
RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames))
|
||||
<< "SALIENT_POINT_FRAME_VIZ_FRAMES can only be used when "
|
||||
"CROPPED_FRAMES is specified.";
|
||||
cc->Outputs().Tag(kOutputFocusPointFrameViz).Set<ImageFrame>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputSummary)) {
|
||||
cc->Outputs().Tag(kOutputSummary).Set<VideoCroppingSummary>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) {
|
||||
cc->Outputs().Tag(kExternalRenderingPerFrame).Set<ExternalRenderFrame>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
|
||||
cc->Outputs()
|
||||
.Tag(kExternalRenderingFullVid)
|
||||
.Set<std::vector<ExternalRenderFrame>>();
|
||||
}
|
||||
RET_CHECK(cc->Outputs().HasTag(kExternalRenderingPerFrame) ||
|
||||
cc->Outputs().HasTag(kExternalRenderingFullVid) ||
|
||||
cc->Outputs().HasTag(kOutputCroppedFrames))
|
||||
<< "At leaset one output stream must be specified";
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -104,6 +132,11 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
|
|||
if (cc->Outputs().HasTag(kOutputSummary)) {
|
||||
summary_ = absl::make_unique<VideoCroppingSummary>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
|
||||
external_render_list_ =
|
||||
absl::make_unique<std::vector<ExternalRenderFrame>>();
|
||||
}
|
||||
should_perform_frame_cropping_ = cc->Outputs().HasTag(kOutputCroppedFrames);
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -127,6 +160,28 @@ namespace {
|
|||
*aspect_ratio = width_ratio / height_ratio;
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
void ConstructExternalRenderMessage(
|
||||
const cv::Rect& crop_from_location, const cv::Rect& render_to_location,
|
||||
const cv::Scalar& padding_color, const uint64 timestamp_us,
|
||||
ExternalRenderFrame* external_render_message) {
|
||||
auto crop_from_message =
|
||||
external_render_message->mutable_crop_from_location();
|
||||
crop_from_message->set_x(crop_from_location.x);
|
||||
crop_from_message->set_y(crop_from_location.y);
|
||||
crop_from_message->set_width(crop_from_location.width);
|
||||
crop_from_message->set_height(crop_from_location.height);
|
||||
auto render_to_message =
|
||||
external_render_message->mutable_render_to_location();
|
||||
render_to_message->set_x(render_to_location.x);
|
||||
render_to_message->set_y(render_to_location.y);
|
||||
render_to_message->set_width(render_to_location.width);
|
||||
render_to_message->set_height(render_to_location.height);
|
||||
auto padding_color_message = external_render_message->mutable_padding_color();
|
||||
padding_color_message->set_r(padding_color[0]);
|
||||
padding_color_message->set_g(padding_color[1]);
|
||||
padding_color_message->set_b(padding_color[2]);
|
||||
external_render_message->set_timestamp_us(timestamp_us);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
::mediapipe::Status SceneCroppingCalculator::Process(
|
||||
|
@ -230,8 +285,9 @@ namespace {
|
|||
is_end_of_scene = cc->Inputs().Tag(kInputShotBoundaries).Get<bool>();
|
||||
}
|
||||
const bool force_buffer_flush =
|
||||
scene_frames_.size() >= options_.max_scene_size();
|
||||
if (!scene_frames_.empty() && (is_end_of_scene || force_buffer_flush)) {
|
||||
scene_frame_timestamps_.size() >= options_.max_scene_size();
|
||||
if (!scene_frame_timestamps_.empty() &&
|
||||
(is_end_of_scene || force_buffer_flush)) {
|
||||
MP_RETURN_IF_ERROR(ProcessScene(is_end_of_scene, cc));
|
||||
}
|
||||
|
||||
|
@ -240,11 +296,14 @@ namespace {
|
|||
LOG_EVERY_N(ERROR, 10)
|
||||
<< "------------------------ (Breathing) Time(s): "
|
||||
<< cc->Inputs().Tag(kInputVideoFrames).Value().Timestamp().Seconds();
|
||||
// Only buffer frames if |should_perform_frame_cropping_| is true.
|
||||
if (should_perform_frame_cropping_) {
|
||||
const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get<ImageFrame>();
|
||||
const cv::Mat frame_mat = formats::MatView(&frame);
|
||||
cv::Mat copy_mat;
|
||||
frame_mat.copyTo(copy_mat);
|
||||
scene_frames_.push_back(copy_mat);
|
||||
scene_frames_or_empty_.push_back(copy_mat);
|
||||
}
|
||||
scene_frame_timestamps_.push_back(cc->InputTimestamp().Value());
|
||||
is_key_frames_.push_back(
|
||||
!cc->Inputs().Tag(kInputDetections).Value().IsEmpty());
|
||||
|
@ -274,7 +333,7 @@ namespace {
|
|||
|
||||
::mediapipe::Status SceneCroppingCalculator::Close(
|
||||
::mediapipe::CalculatorContext* cc) {
|
||||
if (!scene_frames_.empty()) {
|
||||
if (!scene_frame_timestamps_.empty()) {
|
||||
MP_RETURN_IF_ERROR(ProcessScene(/* is_end_of_scene = */ true, cc));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputSummary)) {
|
||||
|
@ -282,16 +341,25 @@ namespace {
|
|||
.Tag(kOutputSummary)
|
||||
.Add(summary_.release(), Timestamp::PostStream());
|
||||
}
|
||||
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
|
||||
cc->Outputs()
|
||||
.Tag(kExternalRenderingFullVid)
|
||||
.Add(external_render_list_.release(), Timestamp::PostStream());
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders() {
|
||||
int top_border_size = 0, bottom_border_size = 0;
|
||||
// TODO: split this function into two, one for calculating the border
|
||||
// sizes, the other for the actual removal of borders from the frames.
|
||||
::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders(
|
||||
int* top_border_size, int* bottom_border_size) {
|
||||
*top_border_size = 0;
|
||||
*bottom_border_size = 0;
|
||||
MP_RETURN_IF_ERROR(ComputeSceneStaticBordersSize(
|
||||
static_features_, &top_border_size, &bottom_border_size));
|
||||
static_features_, top_border_size, bottom_border_size));
|
||||
const double scale = static_cast<double>(frame_height_) / key_frame_height_;
|
||||
top_border_distance_ = std::round(scale * top_border_size);
|
||||
const int bottom_border_distance = std::round(scale * bottom_border_size);
|
||||
top_border_distance_ = std::round(scale * *top_border_size);
|
||||
const int bottom_border_distance = std::round(scale * *bottom_border_size);
|
||||
effective_frame_height_ =
|
||||
frame_height_ - top_border_distance_ - bottom_border_distance;
|
||||
|
||||
|
@ -301,10 +369,10 @@ namespace {
|
|||
// Remove borders from frames.
|
||||
cv::Rect roi(0, top_border_distance_, frame_width_,
|
||||
effective_frame_height_);
|
||||
for (int i = 0; i < scene_frames_.size(); ++i) {
|
||||
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
|
||||
cv::Mat tmp;
|
||||
scene_frames_[i](roi).copyTo(tmp);
|
||||
scene_frames_[i] = tmp;
|
||||
scene_frames_or_empty_[i](roi).copyTo(tmp);
|
||||
scene_frames_or_empty_[i] = tmp;
|
||||
}
|
||||
// Adjust detection bounding boxes.
|
||||
for (int i = 0; i < key_frame_infos_.size(); ++i) {
|
||||
|
@ -373,7 +441,9 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
|
|||
FilterKeyFrameInfo();
|
||||
|
||||
// Removes any static borders.
|
||||
MP_RETURN_IF_ERROR(RemoveStaticBorders());
|
||||
int top_static_border_size, bottom_static_border_size;
|
||||
MP_RETURN_IF_ERROR(
|
||||
RemoveStaticBorders(&top_static_border_size, &bottom_static_border_size));
|
||||
|
||||
// Decides if solid background color padding is possible and sets up color
|
||||
// interpolation functions in CIELAB. Uses linear interpolation by default.
|
||||
|
@ -409,20 +479,31 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
|
|||
|
||||
// Crops scene frames.
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
|
||||
auto* cropped_frames_ptr =
|
||||
should_perform_frame_cropping_ ? &cropped_frames : nullptr;
|
||||
|
||||
MP_RETURN_IF_ERROR(scene_cropper_->CropFrames(
|
||||
scene_summary, scene_frames_, focus_point_frames,
|
||||
prior_focus_point_frames_, &cropped_frames));
|
||||
scene_summary, scene_frame_timestamps_.size(), scene_frames_or_empty_,
|
||||
focus_point_frames, prior_focus_point_frames_, top_static_border_size,
|
||||
bottom_static_border_size, &crop_from_locations, cropped_frames_ptr));
|
||||
|
||||
// Formats and outputs cropped frames.
|
||||
bool apply_padding = false;
|
||||
float vertical_fill_precent;
|
||||
std::vector<cv::Rect> render_to_locations;
|
||||
cv::Scalar padding_color;
|
||||
if (should_perform_frame_cropping_) {
|
||||
MP_RETURN_IF_ERROR(FormatAndOutputCroppedFrames(
|
||||
cropped_frames, &apply_padding, &vertical_fill_precent, cc));
|
||||
|
||||
cropped_frames, &render_to_locations, &apply_padding, &padding_color,
|
||||
&vertical_fill_precent, cc));
|
||||
}
|
||||
// Caches prior FocusPointFrames if this was not the end of a scene.
|
||||
prior_focus_point_frames_.clear();
|
||||
if (!is_end_of_scene) {
|
||||
const int start = std::max(0, static_cast<int>(scene_frames_.size()) -
|
||||
const int start =
|
||||
std::max(0, static_cast<int>(scene_frame_timestamps_.size()) -
|
||||
options_.prior_frame_buffer_size());
|
||||
for (int i = start; i < num_key_frames; ++i) {
|
||||
prior_focus_point_frames_.push_back(focus_point_frames[i]);
|
||||
|
@ -449,8 +530,31 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
|
|||
scene_summary->set_is_padded(apply_padding);
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) {
|
||||
for (int i = 0; i < scene_frame_timestamps_.size(); i++) {
|
||||
auto external_render_message = absl::make_unique<ExternalRenderFrame>();
|
||||
ConstructExternalRenderMessage(
|
||||
crop_from_locations[i], render_to_locations[i], padding_color,
|
||||
scene_frame_timestamps_[i], external_render_message.get());
|
||||
cc->Outputs()
|
||||
.Tag(kExternalRenderingPerFrame)
|
||||
.Add(external_render_message.release(),
|
||||
Timestamp(scene_frame_timestamps_[i]));
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kExternalRenderingFullVid)) {
|
||||
for (int i = 0; i < scene_frame_timestamps_.size(); i++) {
|
||||
ExternalRenderFrame render_frame;
|
||||
ConstructExternalRenderMessage(crop_from_locations[i],
|
||||
render_to_locations[i], padding_color,
|
||||
scene_frame_timestamps_[i], &render_frame);
|
||||
external_render_list_->push_back(render_frame);
|
||||
}
|
||||
}
|
||||
|
||||
key_frame_infos_.clear();
|
||||
scene_frames_.clear();
|
||||
scene_frames_or_empty_.clear();
|
||||
scene_frame_timestamps_.clear();
|
||||
is_key_frames_.clear();
|
||||
static_features_.clear();
|
||||
|
@ -459,8 +563,10 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
|
|||
}
|
||||
|
||||
::mediapipe::Status SceneCroppingCalculator::FormatAndOutputCroppedFrames(
|
||||
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
|
||||
float* vertical_fill_precent, CalculatorContext* cc) {
|
||||
const std::vector<cv::Mat>& cropped_frames,
|
||||
std::vector<cv::Rect>* render_to_locations, bool* apply_padding,
|
||||
cv::Scalar* padding_color, float* vertical_fill_precent,
|
||||
CalculatorContext* cc) {
|
||||
RET_CHECK(apply_padding) << "Has padding boolean is null.";
|
||||
if (cropped_frames.empty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
|
@ -493,10 +599,22 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() {
|
|||
<< " target height = " << target_height_;
|
||||
}
|
||||
|
||||
// Compute the "render to" location. This is where the rect taken from the
|
||||
// input video gets pasted on the output frame. For use with external
|
||||
// rendering solutions.
|
||||
const int num_frames = cropped_frames.size();
|
||||
for (int i = 0; i < num_frames; i++) {
|
||||
if (*apply_padding) {
|
||||
render_to_locations->push_back(padder_->ComputeOutputLocation());
|
||||
} else {
|
||||
render_to_locations->push_back(
|
||||
cv::Rect(0, 0, target_width_, target_height_));
|
||||
}
|
||||
}
|
||||
|
||||
// Resizes cropped frames, pads frames, and output frames.
|
||||
cv::Scalar* background_color = nullptr;
|
||||
cv::Scalar interpolated_color;
|
||||
const int num_frames = cropped_frames.size();
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
const int64 time_ms = scene_frame_timestamps_[i];
|
||||
const Timestamp timestamp(time_ms);
|
||||
|
@ -561,9 +679,9 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames(
|
|||
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
|
||||
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
|
||||
MP_RETURN_IF_ERROR(DrawDetectionsAndCropRegions(
|
||||
scene_frames_, is_key_frames_, key_frame_infos_, key_frame_crop_results,
|
||||
frame_format_, &viz_frames));
|
||||
for (int i = 0; i < scene_frames_.size(); ++i) {
|
||||
scene_frames_or_empty_, is_key_frames_, key_frame_infos_,
|
||||
key_frame_crop_results, frame_format_, &viz_frames));
|
||||
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
|
||||
cc->Outputs()
|
||||
.Tag(kOutputKeyFrameCropViz)
|
||||
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));
|
||||
|
@ -572,9 +690,10 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames(
|
|||
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
|
||||
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
|
||||
MP_RETURN_IF_ERROR(DrawFocusPointAndCropWindow(
|
||||
scene_frames_, focus_point_frames, options_.viz_overlay_opacity(),
|
||||
crop_window_width, crop_window_height, frame_format_, &viz_frames));
|
||||
for (int i = 0; i < scene_frames_.size(); ++i) {
|
||||
scene_frames_or_empty_, focus_point_frames,
|
||||
options_.viz_overlay_opacity(), crop_window_width, crop_window_height,
|
||||
frame_format_, &viz_frames));
|
||||
for (int i = 0; i < scene_frames_or_empty_.size(); ++i) {
|
||||
cc->Outputs()
|
||||
.Tag(kOutputFocusPointFrameViz)
|
||||
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));
|
||||
|
|
|
@ -79,8 +79,10 @@ namespace autoflip {
|
|||
// Indicators for shot boundaries (output of shot boundary detection).
|
||||
// - optional tag KEY_FRAMES (type ImageFrame):
|
||||
// Key frames on which features are detected. This is only used to set the
|
||||
// detection features frame size, and when it is omitted, the features frame
|
||||
// size is assumed to be the original scene frame size.
|
||||
// detection features frame size. Alternatively, set
|
||||
// video_feature_width/video_features_height within the options proto to
|
||||
// define this value. When neither is set, the features frame size is
|
||||
// assumed to be the original scene frame size.
|
||||
//
|
||||
// Output streams:
|
||||
// - required tag CROPPED_FRAMES (type ImageFrame):
|
||||
|
@ -95,6 +97,12 @@ namespace autoflip {
|
|||
// - optional tag CROPPING_SUMMARY (type VideoCroppingSummary):
|
||||
// Debug summary information for the video. Only generates one packet when
|
||||
// calculator closes.
|
||||
// - optional tag EXTERNAL_RENDERING_PER_FRAME (type ExternalRenderFrame)
|
||||
// Provides a per-frame message that can be used to render autoflip using an
|
||||
// external renderer.
|
||||
// - optional tag EXTERNAL_RENDERING_FULL_VID (type Vector<ExternalRenderFrame>)
|
||||
// Provides an end-stream message that can be used to render autoflip using
|
||||
// an external renderer.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
|
@ -134,8 +142,11 @@ class SceneCroppingCalculator : public CalculatorBase {
|
|||
::mediapipe::Status Close(::mediapipe::CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
// Removes any static borders from the scene frames before cropping.
|
||||
::mediapipe::Status RemoveStaticBorders();
|
||||
// Removes any static borders from the scene frames before cropping. The
|
||||
// arguments |top_border_size| and |bottom_border_size| report the size of the
|
||||
// removed borders.
|
||||
::mediapipe::Status RemoveStaticBorders(int* top_border_size,
|
||||
int* bottom_border_size);
|
||||
|
||||
// Initializes a FrameCropRegionComputer given input and target frame sizes.
|
||||
::mediapipe::Status InitializeFrameCropRegionComputer();
|
||||
|
@ -158,8 +169,10 @@ class SceneCroppingCalculator : public CalculatorBase {
|
|||
// solid background from static features if possible, otherwise uses blurred
|
||||
// background. Sets apply_padding to true if the scene is padded.
|
||||
::mediapipe::Status FormatAndOutputCroppedFrames(
|
||||
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
|
||||
float* vertical_fill_precent, CalculatorContext* cc);
|
||||
const std::vector<cv::Mat>& cropped_frames,
|
||||
std::vector<cv::Rect>* render_to_locations, bool* apply_padding,
|
||||
cv::Scalar* padding_color, float* vertical_fill_precent,
|
||||
CalculatorContext* cc);
|
||||
|
||||
// Draws and outputs visualization frames if those streams are present.
|
||||
::mediapipe::Status OutputVizFrames(
|
||||
|
@ -193,7 +206,11 @@ class SceneCroppingCalculator : public CalculatorBase {
|
|||
|
||||
// Buffered frames, timestamps, and indicators for key frames in the current
|
||||
// scene (size = number of input video frames).
|
||||
std::vector<cv::Mat> scene_frames_;
|
||||
// Note: scene_frames_or_empty_ may be empty if the actual cropping operation
|
||||
// of frames is turned off, e.g. when |should_perform_frame_cropping_| is
|
||||
// false, so rely on scene_frame_timestamps_.size() to query the number of
|
||||
// accumulated timestamps rather than scene_frames_or_empty_.size().
|
||||
std::vector<cv::Mat> scene_frames_or_empty_;
|
||||
std::vector<int64> scene_frame_timestamps_;
|
||||
std::vector<bool> is_key_frames_;
|
||||
|
||||
|
@ -242,6 +259,17 @@ class SceneCroppingCalculator : public CalculatorBase {
|
|||
|
||||
// Optional diagnostic summary output emitted in Close().
|
||||
std::unique_ptr<VideoCroppingSummary> summary_ = nullptr;
|
||||
|
||||
// Optional list of external rendering messages for each processed frame.
|
||||
std::unique_ptr<std::vector<ExternalRenderFrame>> external_render_list_;
|
||||
|
||||
// Determines whether to perform real cropping on input frames. This flag is
|
||||
// useful when the user only needs to compute cropping windows, in which case
|
||||
// setting this flag to false can avoid buffering as well as cropping frames.
|
||||
// This can significantly reduce memory usage and speed up processing. Some
|
||||
// debugging visualization inevitably will be disabled because of this flag
|
||||
// too.
|
||||
bool should_perform_frame_cropping_ = false;
|
||||
};
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -68,6 +68,22 @@ constexpr char kNoKeyFrameConfig[] = R"(
|
|||
}
|
||||
})";
|
||||
|
||||
constexpr char kDebugConfigNoCroppedFrame[] = R"(
|
||||
calculator: "SceneCroppingCalculator"
|
||||
input_stream: "VIDEO_FRAMES:camera_frames_org"
|
||||
input_stream: "KEY_FRAMES:down_sampled_frames"
|
||||
input_stream: "DETECTION_FEATURES:salient_regions"
|
||||
input_stream: "STATIC_FEATURES:border_features"
|
||||
input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
|
||||
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
|
||||
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
|
||||
options: {
|
||||
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
|
||||
target_width: $0
|
||||
target_height: $1
|
||||
}
|
||||
})";
|
||||
|
||||
constexpr char kDebugConfig[] = R"(
|
||||
calculator: "SceneCroppingCalculator"
|
||||
input_stream: "VIDEO_FRAMES:camera_frames_org"
|
||||
|
@ -79,6 +95,8 @@ constexpr char kDebugConfig[] = R"(
|
|||
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
|
||||
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
|
||||
output_stream: "CROPPING_SUMMARY:cropping_summaries"
|
||||
output_stream: "EXTERNAL_RENDERING_PER_FRAME:external_rendering_per_frame"
|
||||
output_stream: "EXTERNAL_RENDERING_FULL_VID:external_rendering_full_vid"
|
||||
options: {
|
||||
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
|
||||
target_width: $0
|
||||
|
@ -257,6 +275,17 @@ TEST(SceneCroppingCalculatorTest, ChecksPriorFrameBufferSize) {
|
|||
HasSubstr("Prior frame buffer size is negative."));
|
||||
}
|
||||
|
||||
TEST(SceneCroppingCalculatorTest, ChecksDebugConfigWithoutCroppedFrame) {
|
||||
const CalculatorGraphConfig::Node config =
|
||||
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
|
||||
kDebugConfigNoCroppedFrame, kTargetWidth, kTargetHeight,
|
||||
kTargetSizeType, 0, kPriorFrameBufferSize));
|
||||
auto runner = absl::make_unique<CalculatorRunner>(config);
|
||||
const auto status = runner->Run();
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(), HasSubstr("can only be used when"));
|
||||
}
|
||||
|
||||
// Checks that the calculator crops scene frames when there is no input key
|
||||
// frames stream.
|
||||
TEST(SceneCroppingCalculatorTest, HandlesNoKeyFrames) {
|
||||
|
@ -299,14 +328,34 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
|
|||
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES"));
|
||||
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES"));
|
||||
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY"));
|
||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME"));
|
||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID"));
|
||||
const auto& crop_region_viz_frames_outputs =
|
||||
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets;
|
||||
const auto& salient_point_viz_frames_outputs =
|
||||
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets;
|
||||
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets;
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
const auto& ext_render_full_vid =
|
||||
outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets;
|
||||
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
|
||||
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
|
||||
EXPECT_EQ(summary_output.size(), 1);
|
||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||
EXPECT_EQ(ext_render_full_vid.size(), 1);
|
||||
EXPECT_EQ(ext_render_per_frame[0].Get<ExternalRenderFrame>().timestamp_us(),
|
||||
0);
|
||||
EXPECT_EQ(ext_render_full_vid[0]
|
||||
.Get<std::vector<ExternalRenderFrame>>()[0]
|
||||
.timestamp_us(),
|
||||
0);
|
||||
EXPECT_EQ(ext_render_per_frame[1].Get<ExternalRenderFrame>().timestamp_us(),
|
||||
20000);
|
||||
EXPECT_EQ(ext_render_full_vid[0]
|
||||
.Get<std::vector<ExternalRenderFrame>>()[1]
|
||||
.timestamp_us(),
|
||||
20000);
|
||||
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
const auto& crop_region_viz_frame =
|
||||
|
|
|
@ -173,5 +173,28 @@ PaddingEffectGenerator::PaddingEffectGenerator(const int input_width,
|
|||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
cv::Rect PaddingEffectGenerator::ComputeOutputLocation() {
|
||||
const int effective_input_width =
|
||||
is_vertical_padding_ ? input_width_ : input_height_;
|
||||
const int effective_input_height =
|
||||
is_vertical_padding_ ? input_height_ : input_width_;
|
||||
const int effective_output_width =
|
||||
is_vertical_padding_ ? output_width_ : output_height_;
|
||||
const int effective_output_height =
|
||||
is_vertical_padding_ ? output_height_ : output_width_;
|
||||
|
||||
// Step 3 from "process" call above, compute foreground location.
|
||||
const int foreground_height =
|
||||
effective_input_height * effective_output_width / effective_input_width;
|
||||
const int x = 0;
|
||||
const int y = (effective_output_height - foreground_height) / 2;
|
||||
const int width = effective_output_width;
|
||||
const int height = foreground_height;
|
||||
|
||||
cv::Rect region_to_embed_foreground(x, y, width, height);
|
||||
|
||||
return region_to_embed_foreground;
|
||||
}
|
||||
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -55,6 +55,10 @@ class PaddingEffectGenerator {
|
|||
ImageFrame* output_frame,
|
||||
const cv::Scalar* background_color_in_rgb = nullptr);
|
||||
|
||||
// Compute the "render location" on the output frame where the "crop from"
|
||||
// location is to be placed. For use with external rendering soutions.
|
||||
cv::Rect ComputeOutputLocation();
|
||||
|
||||
private:
|
||||
double target_aspect_ratio_;
|
||||
int input_width_ = -1;
|
||||
|
|
|
@ -182,6 +182,16 @@ TEST(PaddingEffectGeneratorTest, ScaleToMultipleOfTwo) {
|
|||
EXPECT_EQ(result_frame.Width(), expect_width);
|
||||
EXPECT_EQ(result_frame.Height(), expect_height);
|
||||
}
|
||||
|
||||
TEST(PaddingEffectGeneratorTest, ComputeOutputLocation) {
|
||||
PaddingEffectGenerator generator(1920, 1080, 1.0);
|
||||
|
||||
auto result_rect = generator.ComputeOutputLocation();
|
||||
EXPECT_EQ(result_rect.x, 0);
|
||||
EXPECT_EQ(result_rect.y, 236);
|
||||
EXPECT_EQ(result_rect.width, 1080);
|
||||
EXPECT_EQ(result_rect.height, 607);
|
||||
}
|
||||
} // namespace
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -25,14 +25,13 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
|
||||
::mediapipe::Status SceneCropper::CropFrames(
|
||||
const SceneKeyFrameCropSummary& scene_summary,
|
||||
const std::vector<cv::Mat>& scene_frames,
|
||||
const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames,
|
||||
const std::vector<cv::Mat>& scene_frames_or_empty,
|
||||
const std::vector<FocusPointFrame>& focus_point_frames,
|
||||
const std::vector<FocusPointFrame>& prior_focus_point_frames,
|
||||
int top_static_border_size, int bottom_static_border_size,
|
||||
std::vector<cv::Rect>* crop_from_location,
|
||||
std::vector<cv::Mat>* cropped_frames) const {
|
||||
RET_CHECK_NE(cropped_frames, nullptr) << "Output cropped frames is null.";
|
||||
|
||||
const int num_scene_frames = scene_frames.size();
|
||||
RET_CHECK_GT(num_scene_frames, 0) << "No scene frames.";
|
||||
RET_CHECK_EQ(focus_point_frames.size(), num_scene_frames)
|
||||
<< "Wrong size of FocusPointFrames.";
|
||||
|
@ -69,15 +68,36 @@ namespace autoflip {
|
|||
xform = affine_opencv;
|
||||
}
|
||||
|
||||
// If no cropped_frames is passed in, return directly.
|
||||
if (!cropped_frames) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
RET_CHECK(!scene_frames_or_empty.empty())
|
||||
<< "If |cropped_frames| != nullptr, scene_frames_or_empty must not be "
|
||||
"empty.";
|
||||
// Prepares cropped frames.
|
||||
cropped_frames->resize(num_scene_frames);
|
||||
for (int i = 0; i < num_scene_frames; ++i) {
|
||||
(*cropped_frames)[i] =
|
||||
cv::Mat::zeros(crop_height, crop_width, scene_frames[i].type());
|
||||
(*cropped_frames)[i] = cv::Mat::zeros(crop_height, crop_width,
|
||||
scene_frames_or_empty[i].type());
|
||||
}
|
||||
|
||||
return AffineRetarget(cv::Size(crop_width, crop_height), scene_frames,
|
||||
scene_frame_xforms, cropped_frames);
|
||||
// Store the "crop from" location on the input frame for use with an external
|
||||
// renderer.
|
||||
for (int i = 0; i < num_scene_frames; i++) {
|
||||
const int left = scene_frame_xforms[i].at<float>(0, 2);
|
||||
const int right = left + crop_width;
|
||||
const int top = top_static_border_size;
|
||||
const int bottom =
|
||||
top_static_border_size +
|
||||
(crop_height - top_static_border_size - bottom_static_border_size);
|
||||
crop_from_location->push_back(
|
||||
cv::Rect(left, top, right - left, bottom - top));
|
||||
}
|
||||
|
||||
return AffineRetarget(cv::Size(crop_width, crop_height),
|
||||
scene_frames_or_empty, scene_frame_xforms,
|
||||
cropped_frames);
|
||||
}
|
||||
|
||||
} // namespace autoflip
|
||||
|
|
|
@ -48,14 +48,19 @@ class SceneCropper {
|
|||
SceneCropper() {}
|
||||
~SceneCropper() {}
|
||||
|
||||
// Crops scene frames given SceneKeyFrameCropSummary, FocusPointFrames, and
|
||||
// any prior FocusPointFrames (to ensure smoothness when there was no actual
|
||||
// scene change).
|
||||
// Computes transformation matrix given SceneKeyFrameCropSummary,
|
||||
// FocusPointFrames, and any prior FocusPointFrames (to ensure smoothness when
|
||||
// there was no actual scene change). Optionally crops the input frames based
|
||||
// on the transform matrix if |cropped_frames| is not nullptr and
|
||||
// |scene_frames_or_empty| isn't empty.
|
||||
// TODO: split this function into two separate functions.
|
||||
::mediapipe::Status CropFrames(
|
||||
const SceneKeyFrameCropSummary& scene_summary,
|
||||
const std::vector<cv::Mat>& scene_frames,
|
||||
const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames,
|
||||
const std::vector<cv::Mat>& scene_frames_or_empty,
|
||||
const std::vector<FocusPointFrame>& focus_point_frames,
|
||||
const std::vector<FocusPointFrame>& prior_focus_point_frames,
|
||||
int top_static_border_size, int bottom_static_border_size,
|
||||
std::vector<cv::Rect>* all_scene_frame_xforms,
|
||||
std::vector<cv::Mat>* cropped_frames) const;
|
||||
};
|
||||
|
||||
|
|
|
@ -71,24 +71,16 @@ std::vector<FocusPointFrame> GetDefaultFocusPointFrames() {
|
|||
return GetFocusPointFrames(kNumSceneFrames);
|
||||
}
|
||||
|
||||
// Checks that CropFrames checks output pointer is not null.
|
||||
TEST(SceneCropperTest, CropFramesChecksOutputNotNull) {
|
||||
SceneCropper scene_cropper;
|
||||
const auto status = scene_cropper.CropFrames(
|
||||
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), nullptr);
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(), HasSubstr("Output cropped frames is null."));
|
||||
}
|
||||
|
||||
// Checks that CropFrames checks that scene frames size is positive.
|
||||
TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) {
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> scene_frames(0);
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto status = scene_cropper.CropFrames(
|
||||
GetDefaultSceneKeyFrameCropSummary(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames);
|
||||
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
|
||||
&crop_from_locations, &cropped_frames);
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(), HasSubstr("No scene frames."));
|
||||
}
|
||||
|
@ -97,10 +89,12 @@ TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) {
|
|||
TEST(SceneCropperTest, CropFramesChecksFocusPointFramesSize) {
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto& scene_frames = GetDefaultSceneFrames();
|
||||
const auto status = scene_cropper.CropFrames(
|
||||
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
|
||||
GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0),
|
||||
&cropped_frames);
|
||||
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
|
||||
GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0), 0, 0,
|
||||
&crop_from_locations, &cropped_frames);
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(), HasSubstr("Wrong size of FocusPointFrames"));
|
||||
}
|
||||
|
@ -111,9 +105,12 @@ TEST(SceneCropperTest, CropFramesChecksCropSizePositive) {
|
|||
scene_summary.set_crop_window_width(-1);
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto& scene_frames = GetDefaultSceneFrames();
|
||||
const auto status = scene_cropper.CropFrames(
|
||||
scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(),
|
||||
GetFocusPointFrames(0), &cropped_frames);
|
||||
scene_summary, scene_frames.size(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
|
||||
&crop_from_locations, &cropped_frames);
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(), HasSubstr("Crop width is non-positive."));
|
||||
}
|
||||
|
@ -124,9 +121,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) {
|
|||
scene_summary.set_crop_window_height(kSceneHeight + 1);
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto& scene_frames = GetDefaultSceneFrames();
|
||||
const auto status = scene_cropper.CropFrames(
|
||||
scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(),
|
||||
GetFocusPointFrames(0), &cropped_frames);
|
||||
scene_summary, scene_frames.size(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
|
||||
&crop_from_locations, &cropped_frames);
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.ToString(),
|
||||
HasSubstr("Crop height exceeds frame height."));
|
||||
|
@ -136,9 +136,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) {
|
|||
TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) {
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto& scene_frames = GetDefaultSceneFrames();
|
||||
MP_ASSERT_OK(scene_cropper.CropFrames(
|
||||
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames));
|
||||
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0,
|
||||
&crop_from_locations, &cropped_frames));
|
||||
ASSERT_EQ(cropped_frames.size(), kNumSceneFrames);
|
||||
for (int i = 0; i < kNumSceneFrames; ++i) {
|
||||
EXPECT_EQ(cropped_frames[i].rows, kCropHeight);
|
||||
|
@ -150,9 +153,12 @@ TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) {
|
|||
TEST(SceneCropperTest, CropFramesWorksWithPriorFocusPointFrames) {
|
||||
SceneCropper scene_cropper;
|
||||
std::vector<cv::Mat> cropped_frames;
|
||||
std::vector<cv::Rect> crop_from_locations;
|
||||
const auto& scene_frames = GetDefaultSceneFrames();
|
||||
MP_EXPECT_OK(scene_cropper.CropFrames(
|
||||
GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(),
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(3), &cropped_frames));
|
||||
GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames,
|
||||
GetDefaultFocusPointFrames(), GetFocusPointFrames(3), 0, 0,
|
||||
&crop_from_locations, &cropped_frames));
|
||||
EXPECT_EQ(cropped_frames.size(), kNumSceneFrames);
|
||||
for (int i = 0; i < kNumSceneFrames; ++i) {
|
||||
EXPECT_EQ(cropped_frames[i].rows, kCropHeight);
|
||||
|
|
42
mediapipe/examples/desktop/face_mesh/BUILD
Normal file
|
@ -0,0 +1,42 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
|
||||
|
||||
cc_binary(
|
||||
name = "face_mesh_tflite",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:simple_run_graph_main",
|
||||
"//mediapipe/graphs/face_mesh:desktop_calculators",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "face_mesh_cpu",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||
"//mediapipe/graphs/face_mesh:desktop_live_calculators",
|
||||
],
|
||||
)
|
||||
|
||||
# Linux only
|
||||
cc_binary(
|
||||
name = "face_mesh_gpu",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main_gpu",
|
||||
"//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators",
|
||||
],
|
||||
)
|
21
mediapipe/examples/ios/facemeshgpu/AppDelegate.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
@interface AppDelegate : UIResponder <UIApplicationDelegate>
|
||||
|
||||
@property(strong, nonatomic) UIWindow *window;
|
||||
|
||||
@end
|
59
mediapipe/examples/ios/facemeshgpu/AppDelegate.m
Normal file
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import "AppDelegate.h"
|
||||
|
||||
@interface AppDelegate ()
|
||||
|
||||
@end
|
||||
|
||||
@implementation AppDelegate
|
||||
|
||||
- (BOOL)application:(UIApplication *)application
|
||||
didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
|
||||
// Override point for customization after application launch.
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)applicationWillResignActive:(UIApplication *)application {
|
||||
// Sent when the application is about to move from active to inactive state. This can occur for
|
||||
// certain types of temporary interruptions (such as an incoming phone call or SMS message) or
|
||||
// when the user quits the application and it begins the transition to the background state. Use
|
||||
// this method to pause ongoing tasks, disable timers, and invalidate graphics rendering
|
||||
// callbacks. Games should use this method to pause the game.
|
||||
}
|
||||
|
||||
- (void)applicationDidEnterBackground:(UIApplication *)application {
|
||||
// Use this method to release shared resources, save user data, invalidate timers, and store
|
||||
// enough application state information to restore your application to its current state in case
|
||||
// it is terminated later. If your application supports background execution, this method is
|
||||
// called instead of applicationWillTerminate: when the user quits.
|
||||
}
|
||||
|
||||
- (void)applicationWillEnterForeground:(UIApplication *)application {
|
||||
// Called as part of the transition from the background to the active state; here you can undo
|
||||
// many of the changes made on entering the background.
|
||||
}
|
||||
|
||||
- (void)applicationDidBecomeActive:(UIApplication *)application {
|
||||
// Restart any tasks that were paused (or not yet started) while the application was inactive. If
|
||||
// the application was previously in the background, optionally refresh the user interface.
|
||||
}
|
||||
|
||||
- (void)applicationWillTerminate:(UIApplication *)application {
|
||||
// Called when the application is about to terminate. Save data if appropriate. See also
|
||||
// applicationDidEnterBackground:.
|
||||
}
|
||||
|
||||
@end
|
|
@ -0,0 +1,99 @@
|
|||
{
|
||||
"images" : [
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "20x20",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "20x20",
|
||||
"scale" : "3x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "29x29",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "29x29",
|
||||
"scale" : "3x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "40x40",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "40x40",
|
||||
"scale" : "3x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "60x60",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "iphone",
|
||||
"size" : "60x60",
|
||||
"scale" : "3x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "20x20",
|
||||
"scale" : "1x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "20x20",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "29x29",
|
||||
"scale" : "1x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "29x29",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "40x40",
|
||||
"scale" : "1x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "40x40",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "76x76",
|
||||
"scale" : "1x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "76x76",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ipad",
|
||||
"size" : "83.5x83.5",
|
||||
"scale" : "2x"
|
||||
},
|
||||
{
|
||||
"idiom" : "ios-marketing",
|
||||
"size" : "1024x1024",
|
||||
"scale" : "1x"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"version" : 1,
|
||||
"author" : "xcode"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"info" : {
|
||||
"version" : 1,
|
||||
"author" : "xcode"
|
||||
}
|
||||
}
|
||||
|
76
mediapipe/examples/ios/facemeshgpu/BUILD
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"@build_bazel_rules_apple//apple:ios.bzl",
|
||||
"ios_application",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
MIN_IOS_VERSION = "10.0"
|
||||
|
||||
ios_application(
|
||||
name = "FaceMeshGpuApp",
|
||||
bundle_id = "com.google.mediapipe.FaceMeshGpu",
|
||||
families = [
|
||||
"iphone",
|
||||
"ipad",
|
||||
],
|
||||
infoplists = ["Info.plist"],
|
||||
minimum_os_version = MIN_IOS_VERSION,
|
||||
provisioning_profile = "//mediapipe/examples/ios:provisioning_profile",
|
||||
deps = [
|
||||
":FaceMeshGpuAppLibrary",
|
||||
"@ios_opencv//:OpencvFramework",
|
||||
],
|
||||
)
|
||||
|
||||
objc_library(
|
||||
name = "FaceMeshGpuAppLibrary",
|
||||
srcs = [
|
||||
"AppDelegate.m",
|
||||
"ViewController.mm",
|
||||
"main.m",
|
||||
],
|
||||
hdrs = [
|
||||
"AppDelegate.h",
|
||||
"ViewController.h",
|
||||
],
|
||||
data = [
|
||||
"Base.lproj/LaunchScreen.storyboard",
|
||||
"Base.lproj/Main.storyboard",
|
||||
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph",
|
||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||
],
|
||||
sdk_frameworks = [
|
||||
"AVFoundation",
|
||||
"CoreGraphics",
|
||||
"CoreMedia",
|
||||
"UIKit",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/objc:mediapipe_framework_ios",
|
||||
"//mediapipe/objc:mediapipe_input_sources_ios",
|
||||
"//mediapipe/objc:mediapipe_layer_renderer",
|
||||
] + select({
|
||||
"//mediapipe:ios_i386": [],
|
||||
"//mediapipe:ios_x86_64": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/graphs/face_mesh:mobile_calculators",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
],
|
||||
}),
|
||||
)
|
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
|
||||
<dependencies>
|
||||
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
|
||||
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
|
||||
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
|
||||
</dependencies>
|
||||
<scenes>
|
||||
<!--View Controller-->
|
||||
<scene sceneID="EHf-IW-A2E">
|
||||
<objects>
|
||||
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
|
||||
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
|
||||
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
|
||||
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
|
||||
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
|
||||
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
|
||||
</view>
|
||||
</viewController>
|
||||
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
|
||||
</objects>
|
||||
<point key="canvasLocation" x="53" y="375"/>
|
||||
</scene>
|
||||
</scenes>
|
||||
</document>
|
|
@ -0,0 +1,51 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14490.70" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
|
||||
<device id="retina4_7" orientation="portrait">
|
||||
<adaptation id="fullscreen"/>
|
||||
</device>
|
||||
<dependencies>
|
||||
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14490.49"/>
|
||||
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
|
||||
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
|
||||
</dependencies>
|
||||
<scenes>
|
||||
<!--View Controller-->
|
||||
<scene sceneID="tne-QT-ifu">
|
||||
<objects>
|
||||
<viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
|
||||
<view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
|
||||
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
|
||||
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
|
||||
<subviews>
|
||||
<view contentMode="scaleToFill" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="EfB-xq-knP">
|
||||
<rect key="frame" x="0.0" y="20" width="375" height="647"/>
|
||||
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
|
||||
<subviews>
|
||||
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="Camera access needed for this demo. Please enable camera access in the Settings app." textAlignment="center" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="emf-N5-sEd">
|
||||
<rect key="frame" x="57" y="248" width="260" height="151"/>
|
||||
<autoresizingMask key="autoresizingMask" flexibleMinX="YES" flexibleMaxX="YES" flexibleMinY="YES" flexibleMaxY="YES"/>
|
||||
<fontDescription key="fontDescription" type="system" pointSize="17"/>
|
||||
<color key="textColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
|
||||
<nil key="highlightedColor"/>
|
||||
</label>
|
||||
</subviews>
|
||||
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
|
||||
<accessibility key="accessibilityConfiguration" label="PreviewDisplayView">
|
||||
<bool key="isElement" value="YES"/>
|
||||
</accessibility>
|
||||
</view>
|
||||
</subviews>
|
||||
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
|
||||
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
|
||||
</view>
|
||||
<connections>
|
||||
<outlet property="_liveView" destination="EfB-xq-knP" id="JQp-2n-q9q"/>
|
||||
<outlet property="_noCameraLabel" destination="emf-N5-sEd" id="91G-3Z-cU3"/>
|
||||
</connections>
|
||||
</viewController>
|
||||
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
|
||||
</objects>
|
||||
<point key="canvasLocation" x="48.799999999999997" y="20.239880059970016"/>
|
||||
</scene>
|
||||
</scenes>
|
||||
</document>
|
42
mediapipe/examples/ios/facemeshgpu/Info.plist
Normal file
|
@ -0,0 +1,42 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>NSCameraUsageDescription</key>
|
||||
<string>This app uses the camera to demonstrate live video processing.</string>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>$(EXECUTABLE_NAME)</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>$(PRODUCT_NAME)</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>APPL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>LSRequiresIPhoneOS</key>
|
||||
<true/>
|
||||
<key>UILaunchStoryboardName</key>
|
||||
<string>LaunchScreen</string>
|
||||
<key>UIMainStoryboardFile</key>
|
||||
<string>Main</string>
|
||||
<key>UIRequiredDeviceCapabilities</key>
|
||||
<array>
|
||||
<string>armv7</string>
|
||||
</array>
|
||||
<key>UISupportedInterfaceOrientations</key>
|
||||
<array>
|
||||
<string>UIInterfaceOrientationPortrait</string>
|
||||
</array>
|
||||
<key>UISupportedInterfaceOrientations~ipad</key>
|
||||
<array>
|
||||
<string>UIInterfaceOrientationPortrait</string>
|
||||
</array>
|
||||
</dict>
|
||||
</plist>
|
19
mediapipe/examples/ios/facemeshgpu/ViewController.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
@interface ViewController : UIViewController
|
||||
|
||||
@end
|
210
mediapipe/examples/ios/facemeshgpu/ViewController.mm
Normal file
|
@ -0,0 +1,210 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import "ViewController.h"
|
||||
|
||||
#import "mediapipe/objc/MPPCameraInputSource.h"
|
||||
#import "mediapipe/objc/MPPGraph.h"
|
||||
#import "mediapipe/objc/MPPLayerRenderer.h"
|
||||
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
|
||||
static NSString* const kGraphName = @"face_mesh_mobile_gpu";
|
||||
|
||||
static const char* kInputStream = "input_video";
|
||||
static const char* kNumFacesInputSidePacket = "num_faces";
|
||||
static const char* kOutputStream = "output_video";
|
||||
static const char* kLandmarksOutputStream = "multi_face_landmarks";
|
||||
static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue";
|
||||
|
||||
// Max number of faces to detect/process.
|
||||
static const int kNumFaces = 1;
|
||||
|
||||
@interface ViewController () <MPPGraphDelegate, MPPInputSourceDelegate>
|
||||
|
||||
// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and
|
||||
// sent video frames on _videoQueue.
|
||||
@property(nonatomic) MPPGraph* mediapipeGraph;
|
||||
|
||||
@end
|
||||
|
||||
@implementation ViewController {
|
||||
/// Handles camera access via AVCaptureSession library.
|
||||
MPPCameraInputSource* _cameraSource;
|
||||
|
||||
/// Inform the user when camera is unavailable.
|
||||
IBOutlet UILabel* _noCameraLabel;
|
||||
/// Display the camera preview frames.
|
||||
IBOutlet UIView* _liveView;
|
||||
/// Render frames in a layer.
|
||||
MPPLayerRenderer* _renderer;
|
||||
|
||||
/// Process camera frames on this queue.
|
||||
dispatch_queue_t _videoQueue;
|
||||
}
|
||||
|
||||
#pragma mark - Cleanup methods
|
||||
|
||||
- (void)dealloc {
|
||||
self.mediapipeGraph.delegate = nil;
|
||||
[self.mediapipeGraph cancel];
|
||||
// Ignore errors since we're cleaning up.
|
||||
[self.mediapipeGraph closeAllInputStreamsWithError:nil];
|
||||
[self.mediapipeGraph waitUntilDoneWithError:nil];
|
||||
}
|
||||
|
||||
#pragma mark - MediaPipe graph methods
|
||||
|
||||
+ (MPPGraph*)loadGraphFromResource:(NSString*)resource {
|
||||
// Load the graph config resource.
|
||||
NSError* configLoadError = nil;
|
||||
NSBundle* bundle = [NSBundle bundleForClass:[self class]];
|
||||
if (!resource || resource.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"];
|
||||
NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError];
|
||||
if (!data) {
|
||||
NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError);
|
||||
return nil;
|
||||
}
|
||||
|
||||
// Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object.
|
||||
mediapipe::CalculatorGraphConfig config;
|
||||
config.ParseFromArray(data.bytes, data.length);
|
||||
|
||||
// Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object.
|
||||
MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config];
|
||||
[newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer];
|
||||
[newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw];
|
||||
[newGraph setSidePacket:(mediapipe::MakePacket<int>(kNumFaces)) named:kNumFacesInputSidePacket];
|
||||
return newGraph;
|
||||
}
|
||||
|
||||
#pragma mark - UIViewController methods
|
||||
|
||||
- (void)viewDidLoad {
|
||||
[super viewDidLoad];
|
||||
|
||||
_renderer = [[MPPLayerRenderer alloc] init];
|
||||
_renderer.layer.frame = _liveView.layer.bounds;
|
||||
[_liveView.layer addSublayer:_renderer.layer];
|
||||
_renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop;
|
||||
// When using the front camera, mirror the input for a more natural look.
|
||||
_renderer.mirrored = YES;
|
||||
|
||||
dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class(
|
||||
DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0);
|
||||
_videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute);
|
||||
|
||||
_cameraSource = [[MPPCameraInputSource alloc] init];
|
||||
[_cameraSource setDelegate:self queue:_videoQueue];
|
||||
_cameraSource.sessionPreset = AVCaptureSessionPresetHigh;
|
||||
_cameraSource.cameraPosition = AVCaptureDevicePositionFront;
|
||||
// The frame's native format is rotated with respect to the portrait orientation.
|
||||
_cameraSource.orientation = AVCaptureVideoOrientationPortrait;
|
||||
|
||||
self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName];
|
||||
self.mediapipeGraph.delegate = self;
|
||||
// Set maxFramesInFlight to a small value to avoid memory contention for real-time processing.
|
||||
self.mediapipeGraph.maxFramesInFlight = 2;
|
||||
}
|
||||
|
||||
// In this application, there is only one ViewController which has no navigation to other view
|
||||
// controllers, and there is only one View with live display showing the result of running the
|
||||
// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph
|
||||
// setup/teardown and camera start/stop logic should be updated appropriately in response to the
|
||||
// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times
|
||||
// depending on the application navigation flow in that case.
|
||||
- (void)viewWillAppear:(BOOL)animated {
|
||||
[super viewWillAppear:animated];
|
||||
|
||||
[_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) {
|
||||
if (granted) {
|
||||
[self startGraphAndCamera];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
_noCameraLabel.hidden = YES;
|
||||
});
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)startGraphAndCamera {
|
||||
// Start running self.mediapipeGraph.
|
||||
NSError* error;
|
||||
if (![self.mediapipeGraph startWithError:&error]) {
|
||||
NSLog(@"Failed to start graph: %@", error);
|
||||
}
|
||||
|
||||
// Start fetching frames from the camera.
|
||||
dispatch_async(_videoQueue, ^{
|
||||
[_cameraSource start];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - MPPGraphDelegate methods
|
||||
|
||||
// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread.
|
||||
- (void)mediapipeGraph:(MPPGraph*)graph
|
||||
didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||
fromStream:(const std::string&)streamName {
|
||||
if (streamName == kOutputStream) {
|
||||
// Display the captured image on the screen.
|
||||
CVPixelBufferRetain(pixelBuffer);
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
[_renderer renderPixelBuffer:pixelBuffer];
|
||||
CVPixelBufferRelease(pixelBuffer);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread.
|
||||
- (void)mediapipeGraph:(MPPGraph*)graph
|
||||
didOutputPacket:(const ::mediapipe::Packet&)packet
|
||||
fromStream:(const std::string&)streamName {
|
||||
if (streamName == kLandmarksOutputStream) {
|
||||
if (packet.IsEmpty()) {
|
||||
NSLog(@"[TS:%lld] No face landmarks", packet.Timestamp().Value());
|
||||
return;
|
||||
}
|
||||
const auto& multi_face_landmarks = packet.Get<std::vector<::mediapipe::NormalizedLandmarkList>>();
|
||||
NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(),
|
||||
multi_face_landmarks.size());
|
||||
for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) {
|
||||
const auto& landmarks = multi_face_landmarks[face_index];
|
||||
NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size());
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(),
|
||||
landmarks.landmark(i).y(), landmarks.landmark(i).z());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - MPPInputSourceDelegate methods
|
||||
|
||||
// Must be invoked on _videoQueue.
|
||||
- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer
|
||||
timestamp:(CMTime)timestamp
|
||||
fromSource:(MPPInputSource*)source {
|
||||
if (source != _cameraSource) {
|
||||
NSLog(@"Unknown source: %@", source);
|
||||
return;
|
||||
}
|
||||
[self.mediapipeGraph sendPixelBuffer:imageBuffer
|
||||
intoStream:kInputStream
|
||||
packetType:MPPPacketTypePixelBuffer];
|
||||
}
|
||||
|
||||
@end
|
22
mediapipe/examples/ios/facemeshgpu/main.m
Normal file
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
#import "AppDelegate.h"
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
@autoreleasepool {
|
||||
return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
|
||||
}
|
||||
}
|
|
@ -28,6 +28,8 @@
|
|||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
constexpr int kIntTestValue = 33;
|
||||
|
||||
typedef std::function<::mediapipe::Status(CalculatorContext* cc)>
|
||||
CalculatorContextFunction;
|
||||
|
||||
|
@ -617,8 +619,9 @@ TEST(CalculatorGraphBoundsTest, ImmediateHandlerBounds) {
|
|||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
// Add four packets into the graph.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Packet p = MakePacket<int>(33).At(Timestamp(i));
|
||||
constexpr int kNumInputs = 4;
|
||||
for (int i = 0; i < kNumInputs; ++i) {
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
}
|
||||
|
||||
|
@ -709,7 +712,7 @@ REGISTER_CALCULATOR(FuturePacketCalculator);
|
|||
// produces no output packets.
|
||||
TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) {
|
||||
// OffsetBoundCalculator produces only timestamp bounds.
|
||||
// The PassthroughCalculator delivers an output packet whenever the
|
||||
// The PassThroughCalculator delivers an output packet whenever the
|
||||
// OffsetBoundCalculator delivers a timestamp bound.
|
||||
CalculatorGraphConfig config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
|
||||
|
@ -740,7 +743,7 @@ TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) {
|
|||
// Add four packets into the graph.
|
||||
constexpr int kNumInputs = 4;
|
||||
for (int i = 0; i < kNumInputs; ++i) {
|
||||
Packet p = MakePacket<int>(33).At(Timestamp(i));
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
}
|
||||
|
||||
|
@ -791,12 +794,15 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) {
|
|||
// Add four packets into the graph.
|
||||
constexpr int kNumInputs = 4;
|
||||
for (int i = 0; i < kNumInputs; ++i) {
|
||||
Packet p = MakePacket<int>(33).At(Timestamp(i));
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(i));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
}
|
||||
|
||||
// No packets arrive, because updated timestamp bounds do not invoke
|
||||
// No packets arrive, because FuturePacketCalculator produces 4 packets but
|
||||
// OffsetBoundCalculator relays only the 4 timestamps without any packets, and
|
||||
// BoundToPacketCalculator does not process timestamps using
|
||||
// SetProcessTimestampBounds. Thus, the graph does not invoke
|
||||
// BoundToPacketCalculator::Process.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
EXPECT_EQ(output_packets.size(), 0);
|
||||
|
@ -1138,6 +1144,8 @@ class ProcessBoundToPacketCalculator : public CalculatorBase {
|
|||
::mediapipe::Status Process(CalculatorContext* cc) final {
|
||||
for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
|
||||
Timestamp t = cc->Inputs().Index(i).Value().Timestamp();
|
||||
// Create a new packet for each input stream with a new timestamp bound,
|
||||
// as long as the new timestamp satisfies the output timestamp bound.
|
||||
if (t == cc->InputTimestamp() &&
|
||||
t >= cc->Outputs().Index(i).NextTimestampBound()) {
|
||||
cc->Outputs().Index(i).Add(new auto(t), t);
|
||||
|
@ -1168,6 +1176,8 @@ class ImmediatePassthroughCalculator : public CalculatorBase {
|
|||
if (!cc->Inputs().Index(i).IsEmpty()) {
|
||||
cc->Outputs().Index(i).AddPacket(cc->Inputs().Index(i).Value());
|
||||
} else {
|
||||
// Update the output stream "i" nextTimestampBound to the timestamp at
|
||||
// which a packet may next be available in input stream "i".
|
||||
Timestamp input_bound =
|
||||
cc->Inputs().Index(i).Value().Timestamp().NextAllowedInStream();
|
||||
if (cc->Outputs().Index(i).NextTimestampBound() < input_bound) {
|
||||
|
@ -1219,33 +1229,22 @@ void TestProcessForEmptyInputs(const std::string& input_stream_handler) {
|
|||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
// Add four packets into the graph.
|
||||
// Add four packets into the graph at ts {0, 10, 20, 30}.
|
||||
constexpr int kFutureMicros = FuturePacketCalculator::kOutputFutureMicros;
|
||||
Packet p;
|
||||
p = MakePacket<int>(33).At(Timestamp(0));
|
||||
constexpr int kNumInputs = 4;
|
||||
std::vector<Timestamp> expected;
|
||||
for (int i = 0; i < kNumInputs; ++i) {
|
||||
const int ts = i * 10;
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
p = MakePacket<int>(33).At(Timestamp(10));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
p = MakePacket<int>(33).At(Timestamp(20));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
p = MakePacket<int>(33).At(Timestamp(30));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
expected.emplace_back(Timestamp(ts + kFutureMicros));
|
||||
}
|
||||
|
||||
// Packets arrive.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
EXPECT_EQ(bounds_ts_packets.size(), 4);
|
||||
|
||||
std::vector<Timestamp> expected = {
|
||||
Timestamp(0 + kFutureMicros), Timestamp(10 + kFutureMicros),
|
||||
Timestamp(20 + kFutureMicros), Timestamp(30 + kFutureMicros)};
|
||||
EXPECT_EQ(GetContents<Timestamp>(bounds_ts_packets), expected);
|
||||
EXPECT_EQ(bounds_ts_packets.size(), kNumInputs);
|
||||
|
||||
// Shutdown the graph.
|
||||
MP_ASSERT_OK(graph.CloseAllPacketSources());
|
||||
|
@ -1335,34 +1334,41 @@ TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) {
|
|||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
// Add four packets to input_0.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Packet p = MakePacket<int>(33).At(Timestamp(i * 10));
|
||||
constexpr int kNumInputs0 = 4;
|
||||
std::vector<Timestamp> expected_output_0;
|
||||
for (int i = 0; i < kNumInputs0; ++i) {
|
||||
const int ts = i * 10;
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
expected_output_0.emplace_back(Timestamp(ts));
|
||||
}
|
||||
|
||||
// Packets arrive.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
EXPECT_EQ(output_0_packets.size(), 4);
|
||||
EXPECT_EQ(output_0_packets.size(), kNumInputs0);
|
||||
// No packets were pushed in "input_1".
|
||||
EXPECT_EQ(output_1_packets.size(), 0);
|
||||
std::vector<Timestamp> expected = //
|
||||
{Timestamp(0), Timestamp(10), Timestamp(20), Timestamp(30)};
|
||||
EXPECT_EQ(GetContents<Timestamp>(output_0_packets), expected);
|
||||
EXPECT_EQ(GetContents<Timestamp>(output_0_packets), expected_output_0);
|
||||
|
||||
// Add two timestamp bounds to bound_1.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
Packet p = MakePacket<int>(33).At(Timestamp(10 + i * 10));
|
||||
// Add two timestamp bounds to "input_1" and update "bound_1" at {10, 20}.
|
||||
constexpr int kNumInputs1 = 2;
|
||||
std::vector<Timestamp> expected_output_1;
|
||||
for (int i = 0; i < kNumInputs1; ++i) {
|
||||
const int ts = 10 + i * 10;
|
||||
Packet p = MakePacket<int>(kIntTestValue).At(Timestamp(ts));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream("input_1", p));
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
expected_output_1.emplace_back(Timestamp(ts));
|
||||
}
|
||||
|
||||
// Bounds arrive.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
EXPECT_EQ(output_0_packets.size(), 4);
|
||||
EXPECT_EQ(output_1_packets.size(), 2);
|
||||
expected = //
|
||||
{Timestamp(10), Timestamp(20)};
|
||||
EXPECT_EQ(GetContents<Timestamp>(output_1_packets), expected);
|
||||
EXPECT_EQ(output_0_packets.size(), kNumInputs0);
|
||||
EXPECT_EQ(output_1_packets.size(), kNumInputs1);
|
||||
EXPECT_EQ(GetContents<Timestamp>(output_1_packets), expected_output_1);
|
||||
|
||||
// Shutdown the graph.
|
||||
MP_ASSERT_OK(graph.CloseAllPacketSources());
|
||||
|
|
|
@ -186,6 +186,7 @@ cc_library(
|
|||
"//mediapipe/framework:packet",
|
||||
"//mediapipe/framework:timestamp",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/node_hash_map.h"
|
||||
#include "mediapipe/framework/calculator_profile.pb.h"
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
|
@ -130,10 +131,10 @@ class AddressIdMap {
|
|||
return pointer_id_map_[id] = next_id++;
|
||||
}
|
||||
void clear() { pointer_id_map_.clear(); }
|
||||
const std::unordered_map<int64, int32>& map() { return pointer_id_map_; }
|
||||
const absl::node_hash_map<int64, int32>& map() { return pointer_id_map_; }
|
||||
|
||||
private:
|
||||
std::unordered_map<int64, int32> pointer_id_map_;
|
||||
absl::node_hash_map<int64, int32> pointer_id_map_;
|
||||
int32 next_id = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -568,7 +568,7 @@ class LambdaCalculator : public CalculatorBase {
|
|||
if (cc->InputSidePackets().HasTag("") > 0) {
|
||||
cc->InputSidePackets().Tag("").Set<ProcessFunction>();
|
||||
}
|
||||
for (std::string tag : {"OPEN", "PROCESS", "CLOSE"}) {
|
||||
for (const std::string& tag : {"OPEN", "PROCESS", "CLOSE"}) {
|
||||
if (cc->InputSidePackets().HasTag(tag)) {
|
||||
cc->InputSidePackets().Tag(tag).Set<CalculatorContextFunction>();
|
||||
}
|
||||
|
|
|
@ -150,7 +150,7 @@ static ::mediapipe::Status PrefixNames(std::string prefix,
|
|||
const proto_ns::RepeatedPtrField<ProtoString>& dst_streams) {
|
||||
ASSIGN_OR_RETURN(auto src_map, tool::TagMap::Create(src_streams));
|
||||
ASSIGN_OR_RETURN(auto dst_map, tool::TagMap::Create(dst_streams));
|
||||
for (auto it : dst_map->Mapping()) {
|
||||
for (const auto& it : dst_map->Mapping()) {
|
||||
const std::string& tag = it.first;
|
||||
const TagMap::TagData* src_tag_data =
|
||||
::mediapipe::FindOrNull(src_map->Mapping(), tag);
|
||||
|
|
69
mediapipe/graphs/face_mesh/BUILD
Normal file
|
@ -0,0 +1,69 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "desktop_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
|
||||
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "desktop_live_gpu_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "face_mesh_mobile_gpu_binary_graph",
|
||||
graph = "face_mesh_mobile.pbtxt",
|
||||
output_name = "face_mesh_mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
67
mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt
Normal file
|
@ -0,0 +1,67 @@
|
|||
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
|
||||
# on CPU.
|
||||
|
||||
# Path to the input video file. (string)
|
||||
input_side_packet: "input_video_path"
|
||||
# Path to the output video file. (string)
|
||||
input_side_packet: "output_video_path"
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input video.
|
||||
node {
|
||||
calculator: "FaceRendererCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
63
mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt
Normal file
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
|
||||
|
||||
# Input image. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
63
mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt
Normal file
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||
|
||||
# Input image. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Defines side packets for further use in the graph.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:num_faces"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { int_value: 1 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
55
mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt
Normal file
|
@ -0,0 +1,55 @@
|
|||
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
|
||||
|
||||
# GPU buffer. (GpuBuffer)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Max number of faces to detect/process. (int)
|
||||
input_side_packet: "num_faces"
|
||||
|
||||
# Output image with rendered results. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/processed faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
output_stream: "multi_face_landmarks"
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects faces and corresponding landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders face-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "FaceRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "NORM_RECTS:face_rects_from_landmarks"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
51
mediapipe/graphs/face_mesh/subgraphs/BUILD
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "renderer_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_renderer_gpu",
|
||||
graph = "face_renderer_gpu.pbtxt",
|
||||
register_as = "FaceRendererGpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_renderer_cpu",
|
||||
graph = "face_renderer_cpu.pbtxt",
|
||||
register_as = "FaceRendererCpu",
|
||||
deps = [
|
||||
":renderer_calculators",
|
||||
],
|
||||
)
|
350
mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt
Normal file
|
@ -0,0 +1,350 @@
|
|||
# MediaPipe face mesh rendering subgraph.
|
||||
|
||||
type: "FaceRendererCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:detections"
|
||||
|
||||
# CPU image with rendered data. (ImageFrame)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_face_landmarks"
|
||||
output_stream: "ITEM:face_landmarks"
|
||||
output_stream: "BATCH_END:landmark_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:landmark_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
# Lips.
|
||||
landmark_connections: 61
|
||||
landmark_connections: 146
|
||||
landmark_connections: 146
|
||||
landmark_connections: 91
|
||||
landmark_connections: 91
|
||||
landmark_connections: 181
|
||||
landmark_connections: 181
|
||||
landmark_connections: 84
|
||||
landmark_connections: 84
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 314
|
||||
landmark_connections: 314
|
||||
landmark_connections: 405
|
||||
landmark_connections: 405
|
||||
landmark_connections: 321
|
||||
landmark_connections: 321
|
||||
landmark_connections: 375
|
||||
landmark_connections: 375
|
||||
landmark_connections: 291
|
||||
landmark_connections: 61
|
||||
landmark_connections: 185
|
||||
landmark_connections: 185
|
||||
landmark_connections: 40
|
||||
landmark_connections: 40
|
||||
landmark_connections: 39
|
||||
landmark_connections: 39
|
||||
landmark_connections: 37
|
||||
landmark_connections: 37
|
||||
landmark_connections: 0
|
||||
landmark_connections: 0
|
||||
landmark_connections: 267
|
||||
landmark_connections: 267
|
||||
landmark_connections: 269
|
||||
landmark_connections: 269
|
||||
landmark_connections: 270
|
||||
landmark_connections: 270
|
||||
landmark_connections: 409
|
||||
landmark_connections: 409
|
||||
landmark_connections: 291
|
||||
landmark_connections: 78
|
||||
landmark_connections: 95
|
||||
landmark_connections: 95
|
||||
landmark_connections: 88
|
||||
landmark_connections: 88
|
||||
landmark_connections: 178
|
||||
landmark_connections: 178
|
||||
landmark_connections: 87
|
||||
landmark_connections: 87
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 317
|
||||
landmark_connections: 317
|
||||
landmark_connections: 402
|
||||
landmark_connections: 402
|
||||
landmark_connections: 318
|
||||
landmark_connections: 318
|
||||
landmark_connections: 324
|
||||
landmark_connections: 324
|
||||
landmark_connections: 308
|
||||
landmark_connections: 78
|
||||
landmark_connections: 191
|
||||
landmark_connections: 191
|
||||
landmark_connections: 80
|
||||
landmark_connections: 80
|
||||
landmark_connections: 81
|
||||
landmark_connections: 81
|
||||
landmark_connections: 82
|
||||
landmark_connections: 82
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 312
|
||||
landmark_connections: 312
|
||||
landmark_connections: 311
|
||||
landmark_connections: 311
|
||||
landmark_connections: 310
|
||||
landmark_connections: 310
|
||||
landmark_connections: 415
|
||||
landmark_connections: 415
|
||||
landmark_connections: 308
|
||||
# Left eye.
|
||||
landmark_connections: 33
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 163
|
||||
landmark_connections: 163
|
||||
landmark_connections: 144
|
||||
landmark_connections: 144
|
||||
landmark_connections: 145
|
||||
landmark_connections: 145
|
||||
landmark_connections: 153
|
||||
landmark_connections: 153
|
||||
landmark_connections: 154
|
||||
landmark_connections: 154
|
||||
landmark_connections: 155
|
||||
landmark_connections: 155
|
||||
landmark_connections: 133
|
||||
landmark_connections: 33
|
||||
landmark_connections: 246
|
||||
landmark_connections: 246
|
||||
landmark_connections: 161
|
||||
landmark_connections: 161
|
||||
landmark_connections: 160
|
||||
landmark_connections: 160
|
||||
landmark_connections: 159
|
||||
landmark_connections: 159
|
||||
landmark_connections: 158
|
||||
landmark_connections: 158
|
||||
landmark_connections: 157
|
||||
landmark_connections: 157
|
||||
landmark_connections: 173
|
||||
landmark_connections: 173
|
||||
landmark_connections: 133
|
||||
# Left eyebrow.
|
||||
landmark_connections: 46
|
||||
landmark_connections: 53
|
||||
landmark_connections: 53
|
||||
landmark_connections: 52
|
||||
landmark_connections: 52
|
||||
landmark_connections: 65
|
||||
landmark_connections: 65
|
||||
landmark_connections: 55
|
||||
landmark_connections: 70
|
||||
landmark_connections: 63
|
||||
landmark_connections: 63
|
||||
landmark_connections: 105
|
||||
landmark_connections: 105
|
||||
landmark_connections: 66
|
||||
landmark_connections: 66
|
||||
landmark_connections: 107
|
||||
# Right eye.
|
||||
landmark_connections: 263
|
||||
landmark_connections: 249
|
||||
landmark_connections: 249
|
||||
landmark_connections: 390
|
||||
landmark_connections: 390
|
||||
landmark_connections: 373
|
||||
landmark_connections: 373
|
||||
landmark_connections: 374
|
||||
landmark_connections: 374
|
||||
landmark_connections: 380
|
||||
landmark_connections: 380
|
||||
landmark_connections: 381
|
||||
landmark_connections: 381
|
||||
landmark_connections: 382
|
||||
landmark_connections: 382
|
||||
landmark_connections: 362
|
||||
landmark_connections: 263
|
||||
landmark_connections: 466
|
||||
landmark_connections: 466
|
||||
landmark_connections: 388
|
||||
landmark_connections: 388
|
||||
landmark_connections: 387
|
||||
landmark_connections: 387
|
||||
landmark_connections: 386
|
||||
landmark_connections: 386
|
||||
landmark_connections: 385
|
||||
landmark_connections: 385
|
||||
landmark_connections: 384
|
||||
landmark_connections: 384
|
||||
landmark_connections: 398
|
||||
landmark_connections: 398
|
||||
landmark_connections: 362
|
||||
# Right eyebrow.
|
||||
landmark_connections: 276
|
||||
landmark_connections: 283
|
||||
landmark_connections: 283
|
||||
landmark_connections: 282
|
||||
landmark_connections: 282
|
||||
landmark_connections: 295
|
||||
landmark_connections: 295
|
||||
landmark_connections: 285
|
||||
landmark_connections: 300
|
||||
landmark_connections: 293
|
||||
landmark_connections: 293
|
||||
landmark_connections: 334
|
||||
landmark_connections: 334
|
||||
landmark_connections: 296
|
||||
landmark_connections: 296
|
||||
landmark_connections: 336
|
||||
# Face oval.
|
||||
landmark_connections: 10
|
||||
landmark_connections: 338
|
||||
landmark_connections: 338
|
||||
landmark_connections: 297
|
||||
landmark_connections: 297
|
||||
landmark_connections: 332
|
||||
landmark_connections: 332
|
||||
landmark_connections: 284
|
||||
landmark_connections: 284
|
||||
landmark_connections: 251
|
||||
landmark_connections: 251
|
||||
landmark_connections: 389
|
||||
landmark_connections: 389
|
||||
landmark_connections: 356
|
||||
landmark_connections: 356
|
||||
landmark_connections: 454
|
||||
landmark_connections: 454
|
||||
landmark_connections: 323
|
||||
landmark_connections: 323
|
||||
landmark_connections: 361
|
||||
landmark_connections: 361
|
||||
landmark_connections: 288
|
||||
landmark_connections: 288
|
||||
landmark_connections: 397
|
||||
landmark_connections: 397
|
||||
landmark_connections: 365
|
||||
landmark_connections: 365
|
||||
landmark_connections: 379
|
||||
landmark_connections: 379
|
||||
landmark_connections: 378
|
||||
landmark_connections: 378
|
||||
landmark_connections: 400
|
||||
landmark_connections: 400
|
||||
landmark_connections: 377
|
||||
landmark_connections: 377
|
||||
landmark_connections: 152
|
||||
landmark_connections: 152
|
||||
landmark_connections: 148
|
||||
landmark_connections: 148
|
||||
landmark_connections: 176
|
||||
landmark_connections: 176
|
||||
landmark_connections: 149
|
||||
landmark_connections: 149
|
||||
landmark_connections: 150
|
||||
landmark_connections: 150
|
||||
landmark_connections: 136
|
||||
landmark_connections: 136
|
||||
landmark_connections: 172
|
||||
landmark_connections: 172
|
||||
landmark_connections: 58
|
||||
landmark_connections: 58
|
||||
landmark_connections: 132
|
||||
landmark_connections: 132
|
||||
landmark_connections: 93
|
||||
landmark_connections: 93
|
||||
landmark_connections: 234
|
||||
landmark_connections: 234
|
||||
landmark_connections: 127
|
||||
landmark_connections: 127
|
||||
landmark_connections: 162
|
||||
landmark_connections: 162
|
||||
landmark_connections: 21
|
||||
landmark_connections: 21
|
||||
landmark_connections: 54
|
||||
landmark_connections: 54
|
||||
landmark_connections: 103
|
||||
landmark_connections: 103
|
||||
landmark_connections: 67
|
||||
landmark_connections: 67
|
||||
landmark_connections: 109
|
||||
landmark_connections: 109
|
||||
landmark_connections: 10
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 1.5
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:landmark_render_data"
|
||||
input_stream: "BATCH_END:landmark_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
output_stream: "RENDER_DATA:rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||
input_stream: "rects_render_data"
|
||||
output_stream: "IMAGE:output_image"
|
||||
}
|
350
mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt
Normal file
|
@ -0,0 +1,350 @@
|
|||
# MediaPipe face mesh rendering subgraph.
|
||||
|
||||
type: "FaceRendererGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:input_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:detections"
|
||||
|
||||
# GPU image with rendered data. (GpuBuffer)
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Converts detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 0 g: 255 b: 0 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
|
||||
# of the graph to process. At the end of the loop, outputs the BATCH_END
|
||||
# timestamp for downstream calculators to inform them that all elements in the
|
||||
# vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITERABLE:multi_face_landmarks"
|
||||
output_stream: "ITEM:face_landmarks"
|
||||
output_stream: "BATCH_END:end_timestamp"
|
||||
}
|
||||
|
||||
# Converts landmarks to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "LandmarksToRenderDataCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "RENDER_DATA:landmarks_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
|
||||
# Lips.
|
||||
landmark_connections: 61
|
||||
landmark_connections: 146
|
||||
landmark_connections: 146
|
||||
landmark_connections: 91
|
||||
landmark_connections: 91
|
||||
landmark_connections: 181
|
||||
landmark_connections: 181
|
||||
landmark_connections: 84
|
||||
landmark_connections: 84
|
||||
landmark_connections: 17
|
||||
landmark_connections: 17
|
||||
landmark_connections: 314
|
||||
landmark_connections: 314
|
||||
landmark_connections: 405
|
||||
landmark_connections: 405
|
||||
landmark_connections: 321
|
||||
landmark_connections: 321
|
||||
landmark_connections: 375
|
||||
landmark_connections: 375
|
||||
landmark_connections: 291
|
||||
landmark_connections: 61
|
||||
landmark_connections: 185
|
||||
landmark_connections: 185
|
||||
landmark_connections: 40
|
||||
landmark_connections: 40
|
||||
landmark_connections: 39
|
||||
landmark_connections: 39
|
||||
landmark_connections: 37
|
||||
landmark_connections: 37
|
||||
landmark_connections: 0
|
||||
landmark_connections: 0
|
||||
landmark_connections: 267
|
||||
landmark_connections: 267
|
||||
landmark_connections: 269
|
||||
landmark_connections: 269
|
||||
landmark_connections: 270
|
||||
landmark_connections: 270
|
||||
landmark_connections: 409
|
||||
landmark_connections: 409
|
||||
landmark_connections: 291
|
||||
landmark_connections: 78
|
||||
landmark_connections: 95
|
||||
landmark_connections: 95
|
||||
landmark_connections: 88
|
||||
landmark_connections: 88
|
||||
landmark_connections: 178
|
||||
landmark_connections: 178
|
||||
landmark_connections: 87
|
||||
landmark_connections: 87
|
||||
landmark_connections: 14
|
||||
landmark_connections: 14
|
||||
landmark_connections: 317
|
||||
landmark_connections: 317
|
||||
landmark_connections: 402
|
||||
landmark_connections: 402
|
||||
landmark_connections: 318
|
||||
landmark_connections: 318
|
||||
landmark_connections: 324
|
||||
landmark_connections: 324
|
||||
landmark_connections: 308
|
||||
landmark_connections: 78
|
||||
landmark_connections: 191
|
||||
landmark_connections: 191
|
||||
landmark_connections: 80
|
||||
landmark_connections: 80
|
||||
landmark_connections: 81
|
||||
landmark_connections: 81
|
||||
landmark_connections: 82
|
||||
landmark_connections: 82
|
||||
landmark_connections: 13
|
||||
landmark_connections: 13
|
||||
landmark_connections: 312
|
||||
landmark_connections: 312
|
||||
landmark_connections: 311
|
||||
landmark_connections: 311
|
||||
landmark_connections: 310
|
||||
landmark_connections: 310
|
||||
landmark_connections: 415
|
||||
landmark_connections: 415
|
||||
landmark_connections: 308
|
||||
# Left eye.
|
||||
landmark_connections: 33
|
||||
landmark_connections: 7
|
||||
landmark_connections: 7
|
||||
landmark_connections: 163
|
||||
landmark_connections: 163
|
||||
landmark_connections: 144
|
||||
landmark_connections: 144
|
||||
landmark_connections: 145
|
||||
landmark_connections: 145
|
||||
landmark_connections: 153
|
||||
landmark_connections: 153
|
||||
landmark_connections: 154
|
||||
landmark_connections: 154
|
||||
landmark_connections: 155
|
||||
landmark_connections: 155
|
||||
landmark_connections: 133
|
||||
landmark_connections: 33
|
||||
landmark_connections: 246
|
||||
landmark_connections: 246
|
||||
landmark_connections: 161
|
||||
landmark_connections: 161
|
||||
landmark_connections: 160
|
||||
landmark_connections: 160
|
||||
landmark_connections: 159
|
||||
landmark_connections: 159
|
||||
landmark_connections: 158
|
||||
landmark_connections: 158
|
||||
landmark_connections: 157
|
||||
landmark_connections: 157
|
||||
landmark_connections: 173
|
||||
landmark_connections: 173
|
||||
landmark_connections: 133
|
||||
# Left eyebrow.
|
||||
landmark_connections: 46
|
||||
landmark_connections: 53
|
||||
landmark_connections: 53
|
||||
landmark_connections: 52
|
||||
landmark_connections: 52
|
||||
landmark_connections: 65
|
||||
landmark_connections: 65
|
||||
landmark_connections: 55
|
||||
landmark_connections: 70
|
||||
landmark_connections: 63
|
||||
landmark_connections: 63
|
||||
landmark_connections: 105
|
||||
landmark_connections: 105
|
||||
landmark_connections: 66
|
||||
landmark_connections: 66
|
||||
landmark_connections: 107
|
||||
# Right eye.
|
||||
landmark_connections: 263
|
||||
landmark_connections: 249
|
||||
landmark_connections: 249
|
||||
landmark_connections: 390
|
||||
landmark_connections: 390
|
||||
landmark_connections: 373
|
||||
landmark_connections: 373
|
||||
landmark_connections: 374
|
||||
landmark_connections: 374
|
||||
landmark_connections: 380
|
||||
landmark_connections: 380
|
||||
landmark_connections: 381
|
||||
landmark_connections: 381
|
||||
landmark_connections: 382
|
||||
landmark_connections: 382
|
||||
landmark_connections: 362
|
||||
landmark_connections: 263
|
||||
landmark_connections: 466
|
||||
landmark_connections: 466
|
||||
landmark_connections: 388
|
||||
landmark_connections: 388
|
||||
landmark_connections: 387
|
||||
landmark_connections: 387
|
||||
landmark_connections: 386
|
||||
landmark_connections: 386
|
||||
landmark_connections: 385
|
||||
landmark_connections: 385
|
||||
landmark_connections: 384
|
||||
landmark_connections: 384
|
||||
landmark_connections: 398
|
||||
landmark_connections: 398
|
||||
landmark_connections: 362
|
||||
# Right eyebrow.
|
||||
landmark_connections: 276
|
||||
landmark_connections: 283
|
||||
landmark_connections: 283
|
||||
landmark_connections: 282
|
||||
landmark_connections: 282
|
||||
landmark_connections: 295
|
||||
landmark_connections: 295
|
||||
landmark_connections: 285
|
||||
landmark_connections: 300
|
||||
landmark_connections: 293
|
||||
landmark_connections: 293
|
||||
landmark_connections: 334
|
||||
landmark_connections: 334
|
||||
landmark_connections: 296
|
||||
landmark_connections: 296
|
||||
landmark_connections: 336
|
||||
# Face oval.
|
||||
landmark_connections: 10
|
||||
landmark_connections: 338
|
||||
landmark_connections: 338
|
||||
landmark_connections: 297
|
||||
landmark_connections: 297
|
||||
landmark_connections: 332
|
||||
landmark_connections: 332
|
||||
landmark_connections: 284
|
||||
landmark_connections: 284
|
||||
landmark_connections: 251
|
||||
landmark_connections: 251
|
||||
landmark_connections: 389
|
||||
landmark_connections: 389
|
||||
landmark_connections: 356
|
||||
landmark_connections: 356
|
||||
landmark_connections: 454
|
||||
landmark_connections: 454
|
||||
landmark_connections: 323
|
||||
landmark_connections: 323
|
||||
landmark_connections: 361
|
||||
landmark_connections: 361
|
||||
landmark_connections: 288
|
||||
landmark_connections: 288
|
||||
landmark_connections: 397
|
||||
landmark_connections: 397
|
||||
landmark_connections: 365
|
||||
landmark_connections: 365
|
||||
landmark_connections: 379
|
||||
landmark_connections: 379
|
||||
landmark_connections: 378
|
||||
landmark_connections: 378
|
||||
landmark_connections: 400
|
||||
landmark_connections: 400
|
||||
landmark_connections: 377
|
||||
landmark_connections: 377
|
||||
landmark_connections: 152
|
||||
landmark_connections: 152
|
||||
landmark_connections: 148
|
||||
landmark_connections: 148
|
||||
landmark_connections: 176
|
||||
landmark_connections: 176
|
||||
landmark_connections: 149
|
||||
landmark_connections: 149
|
||||
landmark_connections: 150
|
||||
landmark_connections: 150
|
||||
landmark_connections: 136
|
||||
landmark_connections: 136
|
||||
landmark_connections: 172
|
||||
landmark_connections: 172
|
||||
landmark_connections: 58
|
||||
landmark_connections: 58
|
||||
landmark_connections: 132
|
||||
landmark_connections: 132
|
||||
landmark_connections: 93
|
||||
landmark_connections: 93
|
||||
landmark_connections: 234
|
||||
landmark_connections: 234
|
||||
landmark_connections: 127
|
||||
landmark_connections: 127
|
||||
landmark_connections: 162
|
||||
landmark_connections: 162
|
||||
landmark_connections: 21
|
||||
landmark_connections: 21
|
||||
landmark_connections: 54
|
||||
landmark_connections: 54
|
||||
landmark_connections: 103
|
||||
landmark_connections: 103
|
||||
landmark_connections: 67
|
||||
landmark_connections: 67
|
||||
landmark_connections: 109
|
||||
landmark_connections: 109
|
||||
landmark_connections: 10
|
||||
landmark_color { r: 255 g: 0 b: 0 }
|
||||
connection_color { r: 0 g: 255 b: 0 }
|
||||
thickness: 2
|
||||
visualize_landmark_depth: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Collects a RenderData object for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopRenderDataCalculator"
|
||||
input_stream: "ITEM:landmarks_render_data"
|
||||
input_stream: "BATCH_END:end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks_render_data"
|
||||
}
|
||||
|
||||
# Converts normalized rects to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "RectToRenderDataCalculator"
|
||||
input_stream: "NORM_RECTS:rects"
|
||||
output_stream: "RENDER_DATA:rects_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
|
||||
filled: false
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
thickness: 4.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
|
||||
input_stream: "rects_render_data"
|
||||
output_stream: "IMAGE_GPU:output_image"
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
## MediaPipe Models
|
||||
|
||||
Here are descriptions of the models used in the [example applications](../docs/examples.md).
|
||||
Here are the descriptions of the models used in the [example applications](../docs/examples.md).
|
||||
|
||||
### Object Detection
|
||||
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/ssdlite_object_detection.tflite)
|
||||
|
@ -8,24 +8,29 @@ Here are descriptions of the models used in the [example applications](../docs/e
|
|||
|
||||
### Face Detection
|
||||
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite)
|
||||
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/blazeface)
|
||||
* Paper: ["BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs"](https://arxiv.org/abs/1907.05047)
|
||||
* [Model card](https://sites.google.com/corp/view/perception-cv4arvr/blazeface#h.p_21ojPZDx3cqq)
|
||||
|
||||
### Face Mesh
|
||||
* [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
|
||||
* Face detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) (see above)
|
||||
* 3D face landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
|
||||
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/facemesh)
|
||||
* Paper: ["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724)
|
||||
* [Google AI Blog post](https://ai.googleblog.com/2019/03/real-time-ar-self-expression-with.html)
|
||||
* [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
|
||||
* [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
|
||||
|
||||
### Hand Detection and Tracking
|
||||
* Palm detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite), [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
|
||||
* 2D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite)
|
||||
* 3D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
||||
* 2D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite)
|
||||
* 3D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
||||
* [Google AI Blog post](https://mediapipe.page.link/handgoogleaiblog)
|
||||
* [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
|
||||
* [Model card](https://mediapipe.page.link/handmc)
|
||||
|
||||
### Hair Segmentation
|
||||
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite)
|
||||
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation)
|
||||
* Paper: ["Real-time Hair segmentation and recoloring on Mobile GPUs"](https://arxiv.org/abs/1907.06740)
|
||||
* [Model card](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation#h.p_NimuO7PgHxlY)
|
||||
* [Model card](https://drive.google.com/file/d/1lPwJ8BD_-3UUor4LayQ0xpa_RIC_hoRh/view)
|
||||
|
|
BIN
mediapipe/models/face_landmark.tflite
Normal file
11
mediapipe/modules/README.md
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Modules
|
||||
|
||||
Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks).
|
||||
|
||||
*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.*
|
||||
|
||||
| Module | Description |
|
||||
| :--- | :--- |
|
||||
| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. |
|
||||
| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. |
|
||||
|
58
mediapipe/modules/face_detection/BUILD
Normal file
|
@ -0,0 +1,58 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_front_cpu",
|
||||
graph = "face_detection_front_cpu.pbtxt",
|
||||
register_as = "FaceDetectionFrontCpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_front_gpu",
|
||||
graph = "face_detection_front_gpu.pbtxt",
|
||||
register_as = "FaceDetectionFrontGpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = [
|
||||
"face_detection_front.tflite",
|
||||
],
|
||||
)
|
7
mediapipe/modules/face_detection/README.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
# face_detection
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`FaceDetectionFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (CPU input, and inference is executed on CPU.)
|
||||
[`FaceDetectionFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (GPU input, and inference is executed on GPU.)
|
||||
|
BIN
mediapipe/modules/face_detection/face_detection_front.tflite
Executable file
143
mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt
Normal file
|
@ -0,0 +1,143 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_front.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_front.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionFrontCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionFrontCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Transforms the input image on CPU to a 128x128 image. To scale the input
|
||||
# image, the scale_mode option is set to FIT to preserve the aspect ratio
|
||||
# (what is expected by the corresponding face detection model), resulting in
|
||||
# potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
output_stream: "IMAGE:transformed_image"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageTransformationCalculatorOptions.ext] {
|
||||
output_width: 128
|
||||
output_height: 128
|
||||
scale_mode: FIT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:transformed_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_front.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 4
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 128
|
||||
input_size_width: 128
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
strides: 16
|
||||
strides: 16
|
||||
strides: 16
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 896
|
||||
num_coords: 16
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 6
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 128.0
|
||||
y_scale: 128.0
|
||||
h_scale: 128.0
|
||||
w_scale: 128.0
|
||||
min_score_thresh: 0.75
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (the
|
||||
# input image to the graph before image transformation).
|
||||
node {
|
||||
calculator: "DetectionLetterboxRemovalCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
143
mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt
Normal file
|
@ -0,0 +1,143 @@
|
|||
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
|
||||
# GPU.)
|
||||
#
|
||||
# It is required that "face_detection_front.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_front.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionFrontGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionFrontGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Transforms the input image on GPU to a 128x128 image. To scale the input
|
||||
# image, the scale_mode option is set to FIT to preserve the aspect ratio
|
||||
# (what is expected by the corresponding face detection model), resulting in
|
||||
# potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "IMAGE_GPU:transformed_image"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageTransformationCalculatorOptions.ext] {
|
||||
output_width: 128
|
||||
output_height: 128
|
||||
scale_mode: FIT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on GPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE_GPU:transformed_image"
|
||||
output_stream: "TENSORS_GPU:input_tensors"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS_GPU:input_tensors"
|
||||
output_stream: "TENSORS_GPU:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_front.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 4
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 128
|
||||
input_size_width: 128
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
strides: 16
|
||||
strides: 16
|
||||
strides: 16
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS_GPU:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 896
|
||||
num_coords: 16
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 6
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 128.0
|
||||
y_scale: 128.0
|
||||
h_scale: 128.0
|
||||
w_scale: 128.0
|
||||
min_score_thresh: 0.75
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (the
|
||||
# input image to the graph before image transformation).
|
||||
node {
|
||||
calculator: "DetectionLetterboxRemovalCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|