Divided calculators

This commit is contained in:
mslight 2022-06-22 18:42:35 +04:00
parent dbeea8069e
commit 009449a93b
21 changed files with 3881 additions and 0 deletions

View File

@ -0,0 +1,128 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "draw_lipstick_calculator",
srcs = ["draw_lipstick_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/util:annotation_renderer",
"//mediapipe/util:render_data_cc_proto",
],
alwayslink = 1,
)
cc_library(
name = "form_face_mask_calculator",
srcs = ["form_face_mask_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/util:annotation_renderer",
"//mediapipe/util:render_data_cc_proto",
],
alwayslink = 1,
)
cc_library(
name = "smooth_face_calculator",
srcs = ["smooth_face_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/util:annotation_renderer",
"//mediapipe/util:render_data_cc_proto",
],
alwayslink = 1,
)
cc_library(
name = "whiten_teeth_calculator",
srcs = ["whiten_teeth_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/util:annotation_renderer",
"//mediapipe/util:render_data_cc_proto",
],
alwayslink = 1,
)

View File

@ -0,0 +1,394 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include <cmath>
#include <map>
#include <string>
//#include <android/log.h>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
constexpr char kMaskTag[] = "MASK";
constexpr char kImageFrameTag[] = "IMAGE";
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
using Point = RenderAnnotation::Point;
} // namespace
class DrawLipstickCalculator : public CalculatorBase
{
public:
DrawLipstickCalculator() = default;
~DrawLipstickCalculator() override = default;
static absl::Status GetContract(CalculatorContract *cc);
// From Calculator.
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
absl::Status Close(CalculatorContext *cc) override;
private:
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);
absl::Status RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat);
absl::Status DrawLipstick(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec);
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
std::unordered_map<std::string, cv::Mat> all_masks;
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
};
REGISTER_CALCULATOR(DrawLipstickCalculator);
absl::Status DrawLipstickCalculator::GetContract(CalculatorContract *cc)
{
CHECK_GE(cc->Inputs().NumEntries(), 1);
if (cc->Inputs().HasTag(kImageFrameTag))
{
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
CHECK(cc->Outputs().HasTag(kImageFrameTag));
}
// Data streams to render.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
}
else if (tag.empty())
{
// Empty tag defaults to accepting a single object of Mat type.
cc->Inputs().Get(id).Set<cv::Mat>();
}
}
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
{
image_frame_available_ = true;
}
else
{
}
// Set the output header based on the input header (if present).
const char *tag = kImageFrameTag;
if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty())
{
const auto &input_header =
cc->Inputs().Tag(tag).Header().Get<VideoHeader>();
auto *output_video_header = new VideoHeader(input_header);
cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header));
}
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kMaskTag) &&
cc->Inputs().Tag(kMaskTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
if (cc->Outputs().HasTag(kImageFrameTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kMaskTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask_vec));
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::Close(CalculatorContext *cc)
{
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat)
{
cv::Mat mat_image_ = *image_mat.get();
auto output_frame = absl::make_unique<ImageFrame>(
target_format, mat_image_.cols, mat_image_.rows);
output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image,
ImageFrame::kDefaultAlignmentBoundary);
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_frame.release(), cc->InputTimestamp());
}
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format)
{
if (image_frame_available_)
{
const auto &input_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
int target_mat_type;
switch (input_frame.Format())
{
case ImageFormat::SRGBA:
*target_format = ImageFormat::SRGBA;
target_mat_type = CV_8UC4;
break;
case ImageFormat::SRGB:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
case ImageFormat::GRAY8:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
default:
return absl::UnknownError("Unexpected image frame format.");
break;
}
image_mat = absl::make_unique<cv::Mat>(
input_frame.Height(), input_frame.Width(), target_mat_type);
auto input_mat = formats::MatView(&input_frame);
if (input_frame.Format() == ImageFormat::GRAY8)
{
cv::Mat rgb_mat;
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
rgb_mat.copyTo(*image_mat);
}
else
{
input_mat.copyTo(*image_mat);
}
}
else
{
image_mat = absl::make_unique<cv::Mat>(
150, 150, CV_8UC4,
cv::Scalar(255, 255,
255));
*target_format = ImageFormat::SRGBA;
}
return absl::OkStatus();
}
absl::Status DrawLipstickCalculator::DrawLipstick(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec)
{
cv::Mat mat_image__ = *image_mat.get();
cv::Mat spec_lips_mask, upper_lips_mask, lower_lips_mask;
spec_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F);
upper_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F);
lower_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F);
//__android_log_print(ANDROID_LOG_ERROR, "OVERSEAS", "%d ", mask_vec[1].size().height);
upper_lips_mask=mask_vec.find("UPPER_LIP")->second;
lower_lips_mask= mask_vec.find("LOWER_LIP")->second;
spec_lips_mask = upper_lips_mask + lower_lips_mask;
spec_lips_mask.convertTo(spec_lips_mask, CV_8U);
cv::resize(spec_lips_mask, spec_lips_mask, mat_image__.size(), cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(spec_lips_mask, location);
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
if (!(x.empty()) && !(y.empty()))
{
double min_y, max_y, max_x, min_x;
cv::minMaxLoc(y, &min_y, &max_y);
cv::minMaxLoc(x, &min_x, &max_x);
cv::Mat lips_crop_mask = spec_lips_mask(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255);
cv::Mat lips_crop = cv::Mat(mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x)));
cv::Mat lips_blend = cv::Mat(lips_crop.size().height, lips_crop.size().width, CV_32FC4, cv::Scalar(255.0, 0, 0, 0));
std::vector<cv::Mat> channels(4);
cv::split(lips_blend, channels);
channels[3] = lips_crop_mask * 20;
cv::merge(channels, lips_blend);
cv::Mat tmp_lip_mask;
channels[3].convertTo(tmp_lip_mask, CV_32FC1, 1.0 / 255);
cv::split(lips_blend, channels);
for (auto &ch : channels)
{
cv::multiply(ch, tmp_lip_mask, ch, 1.0, CV_32F);
}
cv::merge(channels, lips_blend);
cv::subtract(1.0, tmp_lip_mask, tmp_lip_mask, cv::noArray(), CV_32F);
cv::split(lips_crop, channels);
for (auto &ch : channels)
{
cv::multiply(ch, tmp_lip_mask, ch, 1.0, CV_8U);
}
cv::merge(channels, lips_crop);
cv::add(lips_blend, lips_crop, lips_crop, cv::noArray(), CV_8U);
lips_crop = cv::abs(lips_crop);
cvtColor(lips_crop, lips_crop, cv::COLOR_RGBA2RGB);
cv::Mat slice = mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
lips_crop_mask.convertTo(lips_crop_mask, slice.type());
slice.copyTo(slice, lips_crop_mask);
cv::Mat masked_lips_crop, slice_gray;
lips_crop.copyTo(masked_lips_crop, lips_crop_mask);
cv::cvtColor(masked_lips_crop, slice_gray, cv::COLOR_RGB2GRAY);
masked_lips_crop.copyTo(slice, slice_gray);
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,445 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include <cmath>
#include <string>
#include <map>
//#include <android/log.h>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
constexpr char kVectorTag[] = "VECTOR";
constexpr char kMaskTag[] = "MASK";
constexpr char kFaceBoxTag[] = "FACEBOX";
constexpr char kImageFrameTag[] = "IMAGE";
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
// When using GPU, this color will become transparent when the calculator
// merges the annotation overlay with the image frame. As a result, drawing in
// this color is not supported and it should be set to something unlikely used.
constexpr uchar kAnnotationBackgroundColor = 2; // Grayscale value.
// Future Image type.
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
static const std::vector<int> UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78};
static const std::vector<int> LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
static const std::vector<int> MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95};
static const std::vector<int> PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71};
static const std::vector<int> LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7};
static const std::vector<int> RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382};
static const std::vector<int> LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46};
static const std::vector<int> RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285};
bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y,
int image_width, int image_height, int *x_px,
int *y_px)
{
CHECK(x_px != nullptr);
CHECK(y_px != nullptr);
CHECK_GT(image_width, 0);
CHECK_GT(image_height, 0);
if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 ||
normalized_y > 1.0)
{
VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0";
}
*x_px = static_cast<int32>(round(normalized_x * image_width));
*y_px = static_cast<int32>(round(normalized_y * image_height));
return true;
}
} // namespace
class FormFaceMaskCalculator : public CalculatorBase
{
public:
FormFaceMaskCalculator() = default;
~FormFaceMaskCalculator() override = default;
static absl::Status GetContract(CalculatorContract *cc);
// From Calculator.
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
absl::Status Close(CalculatorContext *cc) override;
private:
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);
absl::Status RenderToCpu(CalculatorContext *cc, std::unordered_map<std::string, cv::Mat> &all_masks);
absl::Status FormFacePartMask(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks);
absl::Status GetFaceBox(std::unique_ptr<cv::Mat> &image_mat,
const RenderData &render_data);
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
float scale_factor_ = 1.0;
std::tuple<double, double, double, double> face_box;
};
REGISTER_CALCULATOR(FormFaceMaskCalculator);
absl::Status FormFaceMaskCalculator::GetContract(CalculatorContract *cc)
{
CHECK_GE(cc->Inputs().NumEntries(), 1);
if (cc->Inputs().HasTag(kImageFrameTag))
{
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
CHECK(cc->Outputs().HasTag(kMaskTag));
}
// Data streams to render.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (tag == kVectorTag)
{
cc->Inputs().Get(id).Set<std::vector<RenderData>>();
}
else if (tag.empty())
{
// Empty tag defaults to accepting a single object of RenderData type.
cc->Inputs().Get(id).Set<RenderData>();
}
}
if (cc->Outputs().HasTag(kMaskTag))
{
cc->Outputs().Tag(kMaskTag).Set<std::unordered_map<std::string, cv::Mat>>();
}
if (cc->Outputs().HasTag(kFaceBoxTag))
{
cc->Outputs().Tag(kFaceBoxTag).Set<std::tuple<double, double, double, double>>();
}
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
std::unordered_map<std::string, cv::Mat> all_masks;
if (cc->Outputs().HasTag(kMaskTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kVectorTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
if (tag.empty())
{
// Empty tag defaults to accepting a single object of RenderData type.
const RenderData &render_data = cc->Inputs().Get(id).Get<RenderData>();
MP_RETURN_IF_ERROR(FormFacePartMask(cc, image_mat, &target_format, render_data, all_masks));
if (cc->Outputs().HasTag(kFaceBoxTag))
{
MP_RETURN_IF_ERROR(GetFaceBox(image_mat, render_data));
}
}
else
{
RET_CHECK_EQ(kVectorTag, tag);
const std::vector<RenderData> &render_data_vec =
cc->Inputs().Get(id).Get<std::vector<RenderData>>();
for (const RenderData &render_data : render_data_vec)
{
MP_RETURN_IF_ERROR(FormFacePartMask(cc, image_mat, &target_format, render_data, all_masks));
}
}
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks));
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::Close(CalculatorContext *cc)
{
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::RenderToCpu(CalculatorContext *cc,
std::unordered_map<std::string, cv::Mat> &all_masks)
{
auto output_frame = absl::make_unique<std::unordered_map<std::string, cv::Mat>>(all_masks, all_masks.get_allocator());
if (cc->Outputs().HasTag(kMaskTag))
{
cc->Outputs()
.Tag(kMaskTag)
.Add(output_frame.release(), cc->InputTimestamp());
}
auto output_frame2 = absl::make_unique<std::tuple<double, double, double, double>>(face_box);
if (cc->Outputs().HasTag(kFaceBoxTag))
{
cc->Outputs()
.Tag(kFaceBoxTag)
.Add(output_frame2.release(), cc->InputTimestamp());
}
all_masks.clear();
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format)
{
if (image_frame_available_)
{
const auto &input_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
int target_mat_type;
switch (input_frame.Format())
{
case ImageFormat::SRGBA:
*target_format = ImageFormat::SRGBA;
target_mat_type = CV_8UC4;
break;
case ImageFormat::SRGB:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
case ImageFormat::GRAY8:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
default:
return absl::UnknownError("Unexpected image frame format.");
break;
}
image_mat = absl::make_unique<cv::Mat>(
input_frame.Height(), input_frame.Width(), target_mat_type);
auto input_mat = formats::MatView(&input_frame);
if (input_frame.Format() == ImageFormat::GRAY8)
{
cv::Mat rgb_mat;
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
rgb_mat.copyTo(*image_mat);
}
else
{
input_mat.copyTo(*image_mat);
}
}
else
{
image_mat = absl::make_unique<cv::Mat>(
150, 150, CV_8UC3,
cv::Scalar(255, 255,
255));
*target_format = ImageFormat::SRGB;
}
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::GetFaceBox(std::unique_ptr<cv::Mat> &image_mat,
const RenderData &render_data)
{
cv::Mat mat_image_ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
std::vector<int> x_s, y_s;
double box_min_y, box_max_y, box_max_x, box_min_x;
for (auto &annotation : render_data.render_annotations())
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
const auto &point = annotation.point();
int x = -1;
int y = -1;
if (point.normalized())
{
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
}
else
{
x = static_cast<int>(point.x() * scale_factor_);
y = static_cast<int>(point.y() * scale_factor_);
}
x_s.push_back(point.x());
x_s.push_back(point.y());
}
}
cv::minMaxLoc(y_s, &box_min_y, &box_max_y);
cv::minMaxLoc(x_s, &box_min_x, &box_max_x);
box_min_y = box_min_y * 0.9;
face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y);
return absl::OkStatus();
}
absl::Status FormFaceMaskCalculator::FormFacePartMask(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks)
{
cv::Mat mat_image_ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
std::unordered_map<std::string, const std::vector<int>> orderList;
orderList.insert(make_pair("UPPER_LIP", UPPER_LIP));
orderList.insert(make_pair("LOWER_LIP", LOWER_LIP));
orderList.insert(make_pair("FACE_OVAL", FACE_OVAL));
orderList.insert(make_pair("MOUTH_INSIDE", MOUTH_INSIDE));
orderList.insert(make_pair("LEFT_EYE", LEFT_EYE));
orderList.insert(make_pair("RIGHT_EYE", RIGHT_EYE));
orderList.insert(make_pair("LEFT_BROW", LEFT_BROW));
orderList.insert(make_pair("RIGHT_BROW", RIGHT_BROW));
orderList.insert(make_pair("LIPS", LIPS));
orderList.insert(make_pair("PART_FOREHEAD_B", PART_FOREHEAD_B));
cv::Mat mask;
std::vector<cv::Point> point_array;
int c = 0;
for (const auto &[key, value] : orderList)
{
for (auto order : value)
{
c = 0;
for (auto &annotation : render_data.render_annotations())
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
if (order == c)
{
const auto &point = annotation.point();
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
point_array.push_back(cv::Point(x, y));
}
c += 1;
}
}
}
std::vector<std::vector<cv::Point>> point_vec;
point_vec.push_back(point_array);
mask = cv::Mat::zeros(mat_image_.size(), CV_32FC1);
cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
all_masks.insert(make_pair(key, mask));
point_vec.clear();
point_array.clear();
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,456 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include <cmath>
//#include <android/log.h>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
constexpr char kMaskTag[] = "MASK";
constexpr char kFaceBoxTag[] = "FACEBOX";
constexpr char kImageFrameTag[] = "IMAGE";
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
using Point = RenderAnnotation::Point;
} // namespace
class SmoothFaceCalculator : public CalculatorBase
{
public:
SmoothFaceCalculator() = default;
~SmoothFaceCalculator() override = default;
static absl::Status GetContract(CalculatorContract *cc);
// From Calculator.
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
absl::Status Close(CalculatorContext *cc) override;
private:
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);
absl::Status RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat);
absl::Status SmoothFace(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec,
const std::tuple<double, double, double, double> &face_box);
cv::Mat predict_forehead_mask(std::unique_ptr<cv::Mat> &image_mat,
const std::unordered_map<std::string, cv::Mat> &mask_vec, double face_box_min_y);
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
std::unordered_map<std::string, cv::Mat> all_masks;
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
};
REGISTER_CALCULATOR(SmoothFaceCalculator);
absl::Status SmoothFaceCalculator::GetContract(CalculatorContract *cc)
{
CHECK_GE(cc->Inputs().NumEntries(), 1);
if (cc->Inputs().HasTag(kImageFrameTag))
{
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
CHECK(cc->Outputs().HasTag(kImageFrameTag));
}
// Data streams to render.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
}
else if (tag.empty())
{
// Empty tag defaults to accepting a single object of Mat type.
cc->Inputs().Get(id).Set<cv::Mat>();
}
if (tag == kFaceBoxTag)
{
cc->Inputs().Get(id).Set<std::tuple<double, double, double, double>>();
}
}
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
return absl::OkStatus();
}
absl::Status SmoothFaceCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
{
image_frame_available_ = true;
}
else
{
}
// Set the output header based on the input header (if present).
const char *tag = kImageFrameTag;
if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty())
{
const auto &input_header =
cc->Inputs().Tag(tag).Header().Get<VideoHeader>();
auto *output_video_header = new VideoHeader(input_header);
cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header));
}
return absl::OkStatus();
}
absl::Status SmoothFaceCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kMaskTag) &&
cc->Inputs().Tag(kMaskTag).IsEmpty())
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kFaceBoxTag) &&
cc->Inputs().Tag(kFaceBoxTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
if (cc->Outputs().HasTag(kImageFrameTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && (tag != kMaskTag || tag != kFaceBoxTag))
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
RET_CHECK_EQ(kFaceBoxTag, tag);
const std::tuple<double, double, double, double> &face_box =
cc->Inputs().Get(id).Get<std::tuple<double, double, double, double>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec, face_box));
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
return absl::OkStatus();
}
absl::Status SmoothFaceCalculator::Close(CalculatorContext *cc)
{
return absl::OkStatus();
}
absl::Status SmoothFaceCalculator::RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat)
{
cv::Mat mat_image__ = *image_mat.get();
auto output_frame = absl::make_unique<ImageFrame>(
target_format, mat_image__.cols, mat_image__.rows);
output_frame->CopyPixelData(target_format, mat_image__.cols, mat_image__.rows, data_image,
ImageFrame::kDefaultAlignmentBoundary);
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_frame.release(), cc->InputTimestamp());
}
return absl::OkStatus();
}
absl::Status SmoothFaceCalculator::CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format)
{
if (image_frame_available_)
{
const auto &input_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
int target_mat_type;
switch (input_frame.Format())
{
case ImageFormat::SRGBA:
*target_format = ImageFormat::SRGBA;
target_mat_type = CV_8UC4;
break;
case ImageFormat::SRGB:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
case ImageFormat::GRAY8:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
default:
return absl::UnknownError("Unexpected image frame format.");
break;
}
image_mat = absl::make_unique<cv::Mat>(
input_frame.Height(), input_frame.Width(), target_mat_type);
auto input_mat = formats::MatView(&input_frame);
if (input_frame.Format() == ImageFormat::GRAY8)
{
cv::Mat rgb_mat;
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
rgb_mat.copyTo(*image_mat);
}
else
{
input_mat.copyTo(*image_mat);
}
}
else
{
image_mat = absl::make_unique<cv::Mat>(
150, 150, CV_8UC4,
cv::Scalar(255, 255,
255));
*target_format = ImageFormat::SRGBA;
}
return absl::OkStatus();
}
cv::Mat SmoothFaceCalculator::predict_forehead_mask(std::unique_ptr<cv::Mat> &image_mat,
const std::unordered_map<std::string, cv::Mat> &mask_vec, double face_box_min_y)
{
cv::Mat mat_image__ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
cv::Mat part_forehead_mask = mask_vec.find("PART_FOREHEAD_B")->second.clone();
part_forehead_mask.convertTo(part_forehead_mask, CV_32F, 1.0 / 255);
part_forehead_mask.convertTo(part_forehead_mask, CV_8U);
cv::Mat image_sm, image_sm_hsv, skinMask;
cv::resize(mat_image__, image_sm, cv::Size(mat_image__.size().width, mat_image__.size().height));
cv::cvtColor(image_sm, image_sm_hsv, cv::COLOR_BGR2HSV);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::Vec3d hsv_min, hsv_max;
std::vector<cv::Mat> channels(3);
cv::split(image_sm_hsv, channels);
std::vector<std::vector<double>> minx(3), maxx(3);
int c = 0;
for (auto ch : channels)
{
cv::Mat row, mask_row;
double min, max;
for (int i = 0; i < ch.rows; i++)
{
row = ch.row(i);
mask_row = part_forehead_mask.row(i);
cv::minMaxLoc(row, &min, &max, 0, 0, mask_row);
minx[c].push_back(min);
maxx[c].push_back(max);
}
c++;
}
for (int i = 0; i < 3; i++)
{
hsv_min[i] = *std::min_element(minx[i].begin(), minx[i].end());
}
for (int i = 0; i < 3; i++)
{
hsv_max[i] = *std::max_element(maxx[i].begin(), maxx[i].end());
}
cv::Mat _forehead_kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(1, 1));
cv::inRange(image_sm_hsv, hsv_min, hsv_max, skinMask);
cv::erode(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
cv::dilate(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
skinMask.convertTo(skinMask, CV_8U, 1.0 / 255);
cv::findNonZero(skinMask, location);
double max_part_f, x_min_part, x_max_part;
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
cv::minMaxLoc(y, NULL, &max_part_f);
cv::minMaxLoc(x, &x_min_part, &x_max_part);
cv::Mat new_skin_mask = cv::Mat::zeros(skinMask.size(), CV_8U);
new_skin_mask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)) =
skinMask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part));
return new_skin_mask;
}
absl::Status SmoothFaceCalculator::SmoothFace(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec,
const std::tuple<double, double, double, double> &face_box)
{
cv::Mat mat_image__ = *image_mat.get();
cv::Mat mouth_mask, mouth;
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
cv::Mat not_full_face = mask_vec.find("FACE_OVAL")->second.clone() +
predict_forehead_mask(image_mat, mask_vec, std::get<1>(face_box)) -
mask_vec.find("LEFT_EYE")->second.clone() -
mask_vec.find("RIGHT_EYE")->second.clone() -
mask_vec.find("LEFT_BROW")->second.clone() -
mask_vec.find("RIGHT_BROW")->second.clone() -
mask_vec.find("LIPS")->second.clone();
cv::resize(not_full_face,
not_full_face,
mat_image__.size(), 0, 0,
cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(not_full_face, location);
double min_y, min_x, max_x, max_y;
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
cv::minMaxLoc(x, &min_x, &max_x);
cv::minMaxLoc(y, &min_y, &max_y);
cv::Mat patch_face = mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
cv::Mat patch_nff = not_full_face(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
cv::Mat patch_new, patch_wow;
cv::cvtColor(patch_face, patch_wow, cv::COLOR_RGBA2RGB);
cv::bilateralFilter(patch_wow, patch_new, 12, 50, 50);
cv::Mat patch_new_nff, patch_new_mask, patch, patch_face_nff;
patch_new.copyTo(patch_new_nff, patch_nff);
patch_face.copyTo(patch_face_nff, patch_nff);
cv::cvtColor(patch_face_nff, patch_face_nff, cv::COLOR_RGBA2RGB);
patch_new_mask = 0.85 * patch_new_nff + 0.15 * patch_face_nff;
patch = cv::min(255, patch_new_mask);
cv::cvtColor(patch, patch, cv::COLOR_RGB2RGBA);
patch.copyTo(patch_face, patch_nff);
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,382 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include <cmath>
//#include <android/log.h>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
constexpr char kMaskTag[] = "MASK";
constexpr char kImageFrameTag[] = "IMAGE";
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
using Point = RenderAnnotation::Point;
} // namespace
class WhitenTeethCalculator : public CalculatorBase
{
public:
WhitenTeethCalculator() = default;
~WhitenTeethCalculator() override = default;
static absl::Status GetContract(CalculatorContract *cc);
// From Calculator.
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
absl::Status Close(CalculatorContext *cc) override;
private:
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);
absl::Status RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat);
absl::Status WhitenTeeth(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec);
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
std::unordered_map<std::string, cv::Mat> all_masks;
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
};
REGISTER_CALCULATOR(WhitenTeethCalculator);
absl::Status WhitenTeethCalculator::GetContract(CalculatorContract *cc)
{
CHECK_GE(cc->Inputs().NumEntries(), 1);
if (cc->Inputs().HasTag(kImageFrameTag))
{
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
CHECK(cc->Outputs().HasTag(kImageFrameTag));
}
// Data streams to render.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
}
else if (tag.empty())
{
// Empty tag defaults to accepting a single object of Mat type.
cc->Inputs().Get(id).Set<cv::Mat>();
}
}
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
{
image_frame_available_ = true;
}
else
{
}
// Set the output header based on the input header (if present).
const char *tag = kImageFrameTag;
if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty())
{
const auto &input_header =
cc->Inputs().Tag(tag).Header().Get<VideoHeader>();
auto *output_video_header = new VideoHeader(input_header);
cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header));
}
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kMaskTag) &&
cc->Inputs().Tag(kMaskTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
if (cc->Outputs().HasTag(kImageFrameTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kMaskTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask_vec));
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::Close(CalculatorContext *cc)
{
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat)
{
cv::Mat mat_image_ = *image_mat.get();
auto output_frame = absl::make_unique<ImageFrame>(
target_format, mat_image_.cols, mat_image_.rows);
output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image,
ImageFrame::kDefaultAlignmentBoundary);
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_frame.release(), cc->InputTimestamp());
}
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format)
{
if (image_frame_available_)
{
const auto &input_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
int target_mat_type;
switch (input_frame.Format())
{
case ImageFormat::SRGBA:
*target_format = ImageFormat::SRGBA;
target_mat_type = CV_8UC4;
break;
case ImageFormat::SRGB:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
case ImageFormat::GRAY8:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
default:
return absl::UnknownError("Unexpected image frame format.");
break;
}
image_mat = absl::make_unique<cv::Mat>(
input_frame.Height(), input_frame.Width(), target_mat_type);
auto input_mat = formats::MatView(&input_frame);
if (input_frame.Format() == ImageFormat::GRAY8)
{
cv::Mat rgb_mat;
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
rgb_mat.copyTo(*image_mat);
}
else
{
input_mat.copyTo(*image_mat);
}
}
else
{
image_mat = absl::make_unique<cv::Mat>(
150, 150, CV_8UC4,
cv::Scalar(255, 255,
255));
*target_format = ImageFormat::SRGBA;
}
return absl::OkStatus();
}
absl::Status WhitenTeethCalculator::WhitenTeeth(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const std::unordered_map<std::string, cv::Mat> &mask_vec)
{
cv::Mat mat_image__ = *image_mat.get();
cv::Mat mouth_mask, mouth;
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
mouth_mask = cv::Mat::zeros(mat_image__.size(), CV_32F);
mouth_mask = mask_vec.find("MOUTH_INSIDE")->second.clone();
cv::resize(mouth_mask, mouth, mat_image__.size(), cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(mouth, location);
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
if (!(x.empty()) && !(y.empty()))
{
double mouth_min_y, mouth_max_y, mouth_max_x, mouth_min_x;
cv::minMaxLoc(y, &mouth_min_y, &mouth_max_y);
cv::minMaxLoc(x, &mouth_min_x, &mouth_max_x);
double mh = mouth_max_y - mouth_min_y;
double mw = mouth_max_x - mouth_min_x;
cv::Mat mouth_crop_mask;
mouth.convertTo(mouth, CV_32F, 1.0 / 255);
mouth.convertTo(mouth, CV_32F, 1.0 / 255);
if (mh / mw > 0.17)
{
mouth_min_y = static_cast<int>(std::max(mouth_min_y - mh * 0.1, 0.0));
mouth_max_y = static_cast<int>(std::min(mouth_max_y + mh * 0.1, (double)image_height_));
mouth_min_x = static_cast<int>(std::max(mouth_min_x - mw * 0.1, 0.0));
mouth_max_x = static_cast<int>(std::min(mouth_max_x + mw * 0.1, (double)image_width_));
mouth_crop_mask = mouth(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x));
cv::Mat img_hsv, tmp_mask, img_hls;
cv::cvtColor(mat_image__(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)), img_hsv,
cv::COLOR_RGBA2RGB);
cv::cvtColor(img_hsv, img_hsv,
cv::COLOR_RGB2HSV);
cv::Mat _mouth_erode_kernel = cv::getStructuringElement(
cv::MORPH_ELLIPSE, cv::Size(7, 7));
cv::erode(mouth_crop_mask * 255, tmp_mask, _mouth_erode_kernel, cv::Point(-1, -1), 3);
cv::GaussianBlur(tmp_mask, tmp_mask, cv::Size(51, 51), 0);
img_hsv.convertTo(img_hsv, CV_8U);
std::vector<cv::Mat> channels(3);
cv::split(img_hsv, channels);
cv::Mat tmp;
cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U);
cv::subtract(channels[1], tmp, channels[1], cv::noArray(), CV_8U);
channels[1] = cv::min(255, channels[1]);
cv::merge(channels, img_hsv);
cv::cvtColor(img_hsv, img_hsv, cv::COLOR_HSV2RGB);
cv::cvtColor(img_hsv, img_hls, cv::COLOR_RGB2HLS);
cv::split(img_hls, channels);
cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U);
cv::add(channels[1], tmp, channels[1], cv::noArray(), CV_8U);
channels[1] = cv::min(255, channels[1]);
cv::merge(channels, img_hls);
cv::cvtColor(img_hls, img_hls, cv::COLOR_HLS2RGB);
cv::cvtColor(img_hls, img_hls, cv::COLOR_RGB2RGBA);
cv::Mat slice = mat_image__(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x));
img_hls.copyTo(slice);
}
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,63 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/beauty:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
android_binary(
name = "beautygpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/beauty:beauty_mobile_gpu.binarypb",
"//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.beautygpu",
"appName": "Beauty",
"mainActivity": ".MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "beauty_mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
"converterNumBuffers": "2",
},
multidex = "native",
deps = [
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
],
)

View File

@ -0,0 +1,93 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.beautygpu;
import android.os.Bundle;
import android.util.Log;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.framework.AndroidPacketCreator;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.framework.PacketGetter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** Main activity of MediaPipe face mesh app. */
public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private static final String TAG = "MainActivity";
private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks";
// Max number of faces to detect/process.
private static final int NUM_FACES = 1;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES));
processor.setInputSidePackets(inputSidePackets);
// To show verbose logging, run:
// adb shell setprop log.tag.MainActivity VERBOSE
if (Log.isLoggable(TAG, Log.VERBOSE)) {
processor.addPacketCallback(
OUTPUT_LANDMARKS_STREAM_NAME,
(packet) -> {
Log.v(TAG, "Received multi face landmarks packet.");
List<NormalizedLandmarkList> multiFaceLandmarks =
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
Log.v(
TAG,
"[TS:"
+ packet.getTimestamp()
+ "] "
+ getMultiFaceLandmarksDebugString(multiFaceLandmarks));
});
}
}
private static String getMultiFaceLandmarksDebugString(
List<NormalizedLandmarkList> multiFaceLandmarks) {
if (multiFaceLandmarks.isEmpty()) {
return "No face landmarks";
}
String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n";
int faceIndex = 0;
for (NormalizedLandmarkList landmarks : multiFaceLandmarks) {
multiFaceLandmarksStr +=
"\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n";
int landmarkIndex = 0;
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
multiFaceLandmarksStr +=
"\t\tLandmark ["
+ landmarkIndex
+ "]: ("
+ landmark.getX()
+ ", "
+ landmark.getY()
+ ", "
+ landmark.getZ()
+ ")\n";
++landmarkIndex;
}
++faceIndex;
}
return multiFaceLandmarksStr;
}
}

View File

@ -0,0 +1,33 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
cc_binary(
name = "face_mesh_tflite",
deps = [
"//mediapipe/examples/desktop:simple_run_graph_main",
"//mediapipe/graphs/beauty:desktop_calculators",
],
)
cc_binary(
name = "face_mesh_cpu",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main",
"//mediapipe/graphs/beauty:desktop_live_calculators",
],
)

View File

@ -0,0 +1,71 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "desktop_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
],
)
cc_library(
name = "desktop_live_gpu_calculators",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/beauty/subgraphs:face_renderer_gpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
],
)
mediapipe_binary_graph(
name = "beauty_mobile_gpu_binary_graph",
graph = "beauty_mobile.pbtxt",
output_name = "beauty_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,66 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# Input image. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,82 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# GPU buffer. (GpuBuffer)
input_stream: "input_video"
# Max number of faces to detect/process. (int)
input_side_packet: "num_faces"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Defines side packets for further use in the graph.
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "throttled_input_video"
output_stream: "throttled_input_video_cpu"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:throttled_input_video_cpu"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video_cpu"
}
# Defines side packets for further use in the graph.
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "output_video_cpu"
output_stream: "output_video"
}

View File

@ -0,0 +1,37 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "face_landmarks_to_render_data_calculator",
srcs = ["face_landmarks_to_render_data_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/util:color_cc_proto",
"//mediapipe/util:render_data_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)

View File

@ -0,0 +1,953 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/util/annotation_renderer.h"
#include <math.h>
#include <algorithm>
#include <cmath>
//#include <android/log.h>
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe {
namespace {
using Arrow = RenderAnnotation::Arrow;
using FilledOval = RenderAnnotation::FilledOval;
using FilledRectangle = RenderAnnotation::FilledRectangle;
using FilledRoundedRectangle = RenderAnnotation::FilledRoundedRectangle;
using Point = RenderAnnotation::Point;
using Line = RenderAnnotation::Line;
using GradientLine = RenderAnnotation::GradientLine;
using Oval = RenderAnnotation::Oval;
using Rectangle = RenderAnnotation::Rectangle;
using RoundedRectangle = RenderAnnotation::RoundedRectangle;
using Text = RenderAnnotation::Text;
static const std::vector<int> UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78};
static const std::vector<int> LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
static const std::vector<int> MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95};
static const std::vector<int> PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71};
static const std::vector<int> LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7};
static const std::vector<int> RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382};
static const std::vector<int> LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46};
static const std::vector<int> RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285};
int ClampThickness(int thickness) {
constexpr int kMaxThickness = 32767; // OpenCV MAX_THICKNESS
return std::clamp(thickness, 1, kMaxThickness);
}
bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y,
int image_width, int image_height, int* x_px,
int* y_px) {
CHECK(x_px != nullptr);
CHECK(y_px != nullptr);
CHECK_GT(image_width, 0);
CHECK_GT(image_height, 0);
if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 ||
normalized_y > 1.0) {
VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0";
}
*x_px = static_cast<int32>(round(normalized_x * image_width));
*y_px = static_cast<int32>(round(normalized_y * image_height));
return true;
}
cv::Scalar MediapipeColorToOpenCVColor(const Color& color) {
return cv::Scalar(color.r(), color.g(), color.b());
}
cv::RotatedRect RectangleToOpenCVRotatedRect(int left, int top, int right,
int bottom, double rotation) {
return cv::RotatedRect(
cv::Point2f((left + right) / 2.f, (top + bottom) / 2.f),
cv::Size2f(right - left, bottom - top), rotation / M_PI * 180.f);
}
void cv_line2(cv::Mat& img, const cv::Point& start, const cv::Point& end,
const cv::Scalar& color1, const cv::Scalar& color2,
int thickness) {
cv::LineIterator iter(img, start, end, /*cv::LINE_4=*/4);
for (int i = 0; i < iter.count; i++, iter++) {
const double alpha = static_cast<double>(i) / iter.count;
const cv::Scalar new_color(color1 * (1.0 - alpha) + color2 * alpha);
const cv::Rect rect(iter.pos(), cv::Size(thickness, thickness));
cv::rectangle(img, rect, new_color, /*cv::FILLED=*/-1, /*cv::LINE_4=*/4);
}
}
} // namespace
void AnnotationRenderer::RenderDataOnImage(const RenderData &render_data)
{
if (render_data.render_annotations().size()){
DrawLipstick(render_data);
WhitenTeeth(render_data);
smooth_face(render_data);
}
else
{
LOG(FATAL) << "Unknown annotation type: ";
}
}
void AnnotationRenderer::AdoptImage(cv::Mat* input_image) {
image_width_ = input_image->cols;
image_height_ = input_image->rows;
// No pixel data copy here, only headers are copied.
mat_image_ = *input_image;
}
int AnnotationRenderer::GetImageWidth() const { return mat_image_.cols; }
int AnnotationRenderer::GetImageHeight() const { return mat_image_.rows; }
void AnnotationRenderer::SetFlipTextVertically(bool flip) {
flip_text_vertically_ = flip;
}
void AnnotationRenderer::SetScaleFactor(float scale_factor) {
if (scale_factor > 0.0f) scale_factor_ = std::min(scale_factor, 1.0f);
}
cv::Mat AnnotationRenderer::FormFacePartMask(std::vector<int> orderList, const RenderData &render_data)
{
int c = 0;
std::vector<cv::Point> point_array;
for (auto order : orderList)
{
c = 0;
for (auto &annotation : render_data.render_annotations())
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
if (order == c)
{
const auto &point = annotation.point();
int x = -1;
int y = -1;
if (point.normalized())
{
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
}
else
{
x = static_cast<int>(point.x() * scale_factor_);
y = static_cast<int>(point.y() * scale_factor_);
}
point_array.push_back(cv::Point(x, y));
}
c += 1;
}
}
}
cv::Mat mask;
std::vector<std::vector<cv::Point>> point;
point.push_back(point_array);
mask = cv::Mat::zeros(mat_image_.size(), CV_32F);
cv::fillPoly(mask, point, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
return mask;
}
std::tuple<double, double, double, double> AnnotationRenderer::GetFaceBox(const RenderData &render_data)
{
std::vector<int> x_s, y_s;
double box_min_y, box_max_y, box_max_x, box_min_x;
for (auto &annotation : render_data.render_annotations())
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
const auto &point = annotation.point();
int x = -1;
int y = -1;
if (point.normalized())
{
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
}
else
{
x = static_cast<int>(point.x() * scale_factor_);
y = static_cast<int>(point.y() * scale_factor_);
}
x_s.push_back(point.x());
x_s.push_back(point.y());
}
}
cv::minMaxLoc(y_s, &box_min_y, &box_max_y);
cv::minMaxLoc(x_s, &box_min_x, &box_max_x);
box_min_y = box_min_y * 0.9;
return std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y);
}
cv::Mat AnnotationRenderer::predict_forehead_mask(const RenderData &render_data, double face_box_min_y)
{
cv::Mat part_forehead_mask = AnnotationRenderer::FormFacePartMask(PART_FOREHEAD_B, render_data);
part_forehead_mask.convertTo(part_forehead_mask, CV_32F, 1.0 / 255);
part_forehead_mask.convertTo(part_forehead_mask, CV_8U);
cv::Mat image_sm, image_sm_hsv, skinMask;
cv::resize(mat_image_, image_sm, cv::Size(mat_image_.size().width, mat_image_.size().height));
cv::cvtColor(image_sm, image_sm_hsv, cv::COLOR_BGR2HSV);
std::vector<int> x, y;
std::vector<cv::Point> location;
// std::cout << "R (numpy) = " << std::endl << cv::format(part_forehead_mask, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl;
cv::Vec3d hsv_min, hsv_max;
std::vector<cv::Mat> channels(3);
cv::split(image_sm_hsv, channels);
std::vector<std::vector<double>> minx(3), maxx(3);
int c = 0;
for (auto ch : channels)
{
cv::Mat row, mask_row;
double min, max;
for (int i = 0; i < ch.rows; i++)
{
row = ch.row(i);
mask_row = part_forehead_mask.row(i);
cv::minMaxLoc(row, &min, &max, 0, 0, mask_row);
minx[c].push_back(min);
maxx[c].push_back(max);
}
c++;
}
for (int i = 0; i < 3; i++)
{
hsv_min[i] = *std::min_element(minx[i].begin(), minx[i].end());
}
for (int i = 0; i < 3; i++)
{
hsv_max[i] = *std::max_element(maxx[i].begin(), maxx[i].end());
}
cv::Mat _forehead_kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(1, 1));
cv::inRange(image_sm_hsv, hsv_min, hsv_max, skinMask);
cv::erode(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
cv::dilate(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
skinMask.convertTo(skinMask, CV_8U, 1.0 / 255);
cv::findNonZero(skinMask, location);
double max_part_f, x_min_part, x_max_part;
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
cv::minMaxLoc(y, NULL, &max_part_f);
cv::minMaxLoc(x, &x_min_part, &x_max_part);
cv::Mat new_skin_mask = cv::Mat::zeros(skinMask.size(), CV_8U);
new_skin_mask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)) =
skinMask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part));
return new_skin_mask;
}
void AnnotationRenderer::smooth_face(const RenderData &render_data)
{
cv::Mat not_full_face = cv::Mat(FormFacePartMask(FACE_OVAL, render_data)) +
cv::Mat(predict_forehead_mask(render_data, std::get<1>(GetFaceBox(render_data)))) -
cv::Mat(FormFacePartMask(LEFT_EYE, render_data)) -
cv::Mat(FormFacePartMask(RIGHT_EYE, render_data)) -
cv::Mat(FormFacePartMask(LEFT_BROW, render_data)) -
cv::Mat(FormFacePartMask(RIGHT_BROW, render_data)) -
cv::Mat(FormFacePartMask(LIPS, render_data));
cv::resize(not_full_face,
not_full_face,
mat_image_.size(), 0, 0,
cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(not_full_face, location);
double min_y, min_x, max_x, max_y;
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
cv::minMaxLoc(x, &min_x, &max_x);
cv::minMaxLoc(y, &min_y, &max_y);
cv::Mat patch_face = mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
cv::Mat patch_nff = not_full_face(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
cv::Mat patch_new;
cv::bilateralFilter(patch_face, patch_new, 12, 50, 50);
cv::Mat patch_new_nff, patch_new_mask, patch, patch_face_nff;
patch_new.copyTo(patch_new_nff, patch_nff);
patch_face.copyTo(patch_face_nff, patch_nff);
patch_new_mask = 0.85 * patch_new_nff + 0.15 * patch_face_nff;
patch = cv::min(255, patch_new_mask);
patch.copyTo(patch_face, patch_nff);
}
cv::Mat matmul32F(cv::Mat& bgr, cv::Mat& mask)
{
assert(bgr.type() == CV_32FC3 && mask.type() == CV_32FC1 && bgr.size() == mask.size());
int H = bgr.rows;
int W = bgr.cols;
cv::Mat dst(bgr.size(), bgr.type());
if (bgr.isContinuous() && mask.isContinuous())
{
W *= H;
H = 1;
}
for( int i = 0; i < H; ++i)
{
float* pdst = ((float*)dst.data)+i*W*3;
float* pbgr = ((float*)bgr.data)+i*W*3;
float* pmask = ((float*)mask.data) + i*W;
for ( int j = 0; j < W; ++j)
{
(*pdst++) = (*pbgr++) *(*pmask);
(*pdst++) = (*pbgr++) *(*pmask);
(*pdst++) = (*pbgr++) *(*pmask);
pmask+=1;
}
}
return dst;
}
void AnnotationRenderer::DrawLipstick(const RenderData &render_data)
{
cv::Mat spec_lips_mask, upper_lips_mask, lower_lips_mask;
spec_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F);
upper_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F);
lower_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F);
upper_lips_mask = AnnotationRenderer::FormFacePartMask(UPPER_LIP, render_data);
lower_lips_mask = AnnotationRenderer::FormFacePartMask(LOWER_LIP, render_data);
spec_lips_mask = upper_lips_mask + lower_lips_mask;
spec_lips_mask.convertTo(spec_lips_mask, CV_8U);
cv::resize(spec_lips_mask, spec_lips_mask, mat_image_.size(), cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(spec_lips_mask, location);
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
if (!(x.empty()) && !(y.empty()))
{
double min_y, max_y, max_x, min_x;
cv::minMaxLoc(y, &min_y, &max_y);
cv::minMaxLoc(x, &min_x, &max_x);
cv::Mat lips_crop_mask = spec_lips_mask(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255);
cv::Mat lips_crop = cv::Mat(mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)).size(), CV_8UC3);
mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)).copyTo(lips_crop);
lips_crop.convertTo(lips_crop, CV_32FC3);
cv::Mat lips_blend;
lips_blend = cv::Mat(lips_crop.size().height, lips_crop.size().width, CV_32FC3, cv::Scalar(255.0, 0, 0));
lips_crop_mask *= 50;
lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255);
lips_blend = matmul32F(lips_blend, lips_crop_mask);
cv::Mat tmp_crop_mask = 1.0 - lips_crop_mask;
cv::Mat slice = mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x));
lips_crop = matmul32F(lips_crop, tmp_crop_mask);
cv::add(lips_blend, lips_crop, slice, cv::noArray(), CV_8U);
}
}
void AnnotationRenderer::WhitenTeeth(const RenderData &render_data)
{
cv::Mat mouth_mask, mouth;
mouth_mask = cv::Mat::zeros(mat_image_.size(), CV_32F);
mouth_mask = AnnotationRenderer::FormFacePartMask(MOUTH_INSIDE, render_data);
cv::resize(mouth_mask, mouth, mat_image_.size(), cv::INTER_LINEAR);
std::vector<int> x, y;
std::vector<cv::Point> location;
cv::findNonZero(mouth, location);
for (auto &i : location)
{
x.push_back(i.x);
y.push_back(i.y);
}
if (!(x.empty()) && !(y.empty()))
{
double mouth_min_y, mouth_max_y, mouth_max_x, mouth_min_x;
cv::minMaxLoc(y, &mouth_min_y, &mouth_max_y);
cv::minMaxLoc(x, &mouth_min_x, &mouth_max_x);
double mh = mouth_max_y - mouth_min_y;
double mw = mouth_max_x - mouth_min_x;
cv::Mat mouth_crop_mask;
mouth.convertTo(mouth, CV_32F, 1.0 / 255);
mouth.convertTo(mouth, CV_32F, 1.0 / 255);
if (mh / mw > 0.17)
{
mouth_min_y = static_cast<int>(std::max(mouth_min_y - mh * 0.1, 0.0));
mouth_max_y = static_cast<int>(std::min(mouth_max_y + mh * 0.1, (double)image_height_));
mouth_min_x = static_cast<int>(std::max(mouth_min_x - mw * 0.1, 0.0));
mouth_max_x = static_cast<int>(std::min(mouth_max_x + mw * 0.1, (double)image_width_));
mouth_crop_mask = mouth(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x));
cv::Mat img_hsv, tmp_mask, img_hls;
cv::cvtColor(mat_image_(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)), img_hsv,
cv::COLOR_RGB2HSV);
cv::Mat _mouth_erode_kernel = cv::getStructuringElement(
cv::MORPH_ELLIPSE, cv::Size(7, 7));
cv::erode(mouth_crop_mask * 255, tmp_mask, _mouth_erode_kernel, cv::Point(-1, -1), 3);
cv::GaussianBlur(tmp_mask, tmp_mask, cv::Size(51, 51), 0);
img_hsv.convertTo(img_hsv, CV_8U);
std::vector<cv::Mat> channels(3);
cv::split(img_hsv, channels);
cv::Mat tmp;
cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U);
cv::subtract(channels[1], tmp, channels[1], cv::noArray(), CV_8U);
channels[1] = cv::min(255, channels[1]);
cv::merge(channels, img_hsv);
cv::cvtColor(img_hsv, img_hsv, cv::COLOR_HSV2RGB);
cv::cvtColor(img_hsv, img_hls, cv::COLOR_RGB2HLS);
cv::split(img_hls, channels);
cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U);
cv::add(channels[1], tmp, channels[1], cv::noArray(), CV_8U);
channels[1] = cv::min(255, channels[1]);
cv::merge(channels, img_hls);
cv::cvtColor(img_hls, img_hls, cv::COLOR_HLS2RGB);
// std::cout << "R (numpy) = " << std::endl << cv::format(img_hls, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl;
cv::Mat slice = mat_image_(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x));
img_hls.copyTo(slice);
}
}
}
void AnnotationRenderer::DrawRectangle(const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& rectangle = annotation.rectangle();
if (rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(),
image_width_, image_height_, &left,
&top));
CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(),
image_width_, image_height_, &right,
&bottom));
} else {
left = static_cast<int>(rectangle.left() * scale_factor_);
top = static_cast<int>(rectangle.top() * scale_factor_);
right = static_cast<int>(rectangle.right() * scale_factor_);
bottom = static_cast<int>(rectangle.bottom() * scale_factor_);
}
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
if (rectangle.rotation() != 0.0) {
const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom,
rectangle.rotation());
const int kNumVertices = 4;
cv::Point2f vertices[kNumVertices];
rect.points(vertices);
for (int i = 0; i < kNumVertices; i++) {
cv::line(mat_image_, vertices[i], vertices[(i + 1) % kNumVertices], color,
thickness);
}
} else {
cv::Rect rect(left, top, right - left, bottom - top);
cv::rectangle(mat_image_, rect, color, thickness);
}
if (rectangle.has_top_left_thickness()) {
const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom,
rectangle.rotation());
const int kNumVertices = 4;
cv::Point2f vertices[kNumVertices];
rect.points(vertices);
const int top_left_thickness =
ClampThickness(round(rectangle.top_left_thickness() * scale_factor_));
cv::ellipse(mat_image_, vertices[1],
cv::Size(top_left_thickness, top_left_thickness), 0.0, 0, 360,
color, -1);
}
}
void AnnotationRenderer::DrawFilledRectangle(
const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& rectangle = annotation.filled_rectangle().rectangle();
if (rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(),
image_width_, image_height_, &left,
&top));
CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(),
image_width_, image_height_, &right,
&bottom));
} else {
left = static_cast<int>(rectangle.left() * scale_factor_);
top = static_cast<int>(rectangle.top() * scale_factor_);
right = static_cast<int>(rectangle.right() * scale_factor_);
bottom = static_cast<int>(rectangle.bottom() * scale_factor_);
}
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
if (rectangle.rotation() != 0.0) {
const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom,
rectangle.rotation());
const int kNumVertices = 4;
cv::Point2f vertices2f[kNumVertices];
rect.points(vertices2f);
// Convert cv::Point2f[] to cv::Point[].
cv::Point vertices[kNumVertices];
for (int i = 0; i < kNumVertices; ++i) {
vertices[i] = vertices2f[i];
}
cv::fillConvexPoly(mat_image_, vertices, kNumVertices, color);
} else {
cv::Rect rect(left, top, right - left, bottom - top);
cv::rectangle(mat_image_, rect, color, -1);
}
}
void AnnotationRenderer::DrawRoundedRectangle(
const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& rectangle = annotation.rounded_rectangle().rectangle();
if (rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(),
image_width_, image_height_, &left,
&top));
CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(),
image_width_, image_height_, &right,
&bottom));
} else {
left = static_cast<int>(rectangle.left() * scale_factor_);
top = static_cast<int>(rectangle.top() * scale_factor_);
right = static_cast<int>(rectangle.right() * scale_factor_);
bottom = static_cast<int>(rectangle.bottom() * scale_factor_);
}
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
const int corner_radius =
round(annotation.rounded_rectangle().corner_radius() * scale_factor_);
const int line_type = annotation.rounded_rectangle().line_type();
DrawRoundedRectangle(mat_image_, cv::Point(left, top),
cv::Point(right, bottom), color, thickness, line_type,
corner_radius);
}
void AnnotationRenderer::DrawFilledRoundedRectangle(
const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& rectangle =
annotation.filled_rounded_rectangle().rounded_rectangle().rectangle();
if (rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(),
image_width_, image_height_, &left,
&top));
CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(),
image_width_, image_height_, &right,
&bottom));
} else {
left = static_cast<int>(rectangle.left() * scale_factor_);
top = static_cast<int>(rectangle.top() * scale_factor_);
right = static_cast<int>(rectangle.right() * scale_factor_);
bottom = static_cast<int>(rectangle.bottom() * scale_factor_);
}
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int corner_radius =
annotation.rounded_rectangle().corner_radius() * scale_factor_;
const int line_type = annotation.rounded_rectangle().line_type();
DrawRoundedRectangle(mat_image_, cv::Point(left, top),
cv::Point(right, bottom), color, -1, line_type,
corner_radius);
}
void AnnotationRenderer::DrawRoundedRectangle(cv::Mat src, cv::Point top_left,
cv::Point bottom_right,
const cv::Scalar& line_color,
int thickness, int line_type,
int corner_radius) {
// Corners:
// p1 - p2
// | |
// p4 - p3
cv::Point p1 = top_left;
cv::Point p2 = cv::Point(bottom_right.x, top_left.y);
cv::Point p3 = bottom_right;
cv::Point p4 = cv::Point(top_left.x, bottom_right.y);
// Draw edges of the rectangle
cv::line(src, cv::Point(p1.x + corner_radius, p1.y),
cv::Point(p2.x - corner_radius, p2.y), line_color, thickness,
line_type);
cv::line(src, cv::Point(p2.x, p2.y + corner_radius),
cv::Point(p3.x, p3.y - corner_radius), line_color, thickness,
line_type);
cv::line(src, cv::Point(p4.x + corner_radius, p4.y),
cv::Point(p3.x - corner_radius, p3.y), line_color, thickness,
line_type);
cv::line(src, cv::Point(p1.x, p1.y + corner_radius),
cv::Point(p4.x, p4.y - corner_radius), line_color, thickness,
line_type);
// Draw arcs at corners.
cv::ellipse(src, p1 + cv::Point(corner_radius, corner_radius),
cv::Size(corner_radius, corner_radius), 180.0, 0, 90, line_color,
thickness, line_type);
cv::ellipse(src, p2 + cv::Point(-corner_radius, corner_radius),
cv::Size(corner_radius, corner_radius), 270.0, 0, 90, line_color,
thickness, line_type);
cv::ellipse(src, p3 + cv::Point(-corner_radius, -corner_radius),
cv::Size(corner_radius, corner_radius), 0.0, 0, 90, line_color,
thickness, line_type);
cv::ellipse(src, p4 + cv::Point(corner_radius, -corner_radius),
cv::Size(corner_radius, corner_radius), 90.0, 0, 90, line_color,
thickness, line_type);
}
void AnnotationRenderer::DrawOval(const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& enclosing_rectangle = annotation.oval().rectangle();
if (enclosing_rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(enclosing_rectangle.left(),
enclosing_rectangle.top(), image_width_,
image_height_, &left, &top));
CHECK(NormalizedtoPixelCoordinates(
enclosing_rectangle.right(), enclosing_rectangle.bottom(), image_width_,
image_height_, &right, &bottom));
} else {
left = static_cast<int>(enclosing_rectangle.left() * scale_factor_);
top = static_cast<int>(enclosing_rectangle.top() * scale_factor_);
right = static_cast<int>(enclosing_rectangle.right() * scale_factor_);
bottom = static_cast<int>(enclosing_rectangle.bottom() * scale_factor_);
}
cv::Point center((left + right) / 2, (top + bottom) / 2);
cv::Size size((right - left) / 2, (bottom - top) / 2);
const double rotation = enclosing_rectangle.rotation() / M_PI * 180.f;
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
cv::ellipse(mat_image_, center, size, rotation, 0, 360, color, thickness);
}
void AnnotationRenderer::DrawFilledOval(const RenderAnnotation& annotation) {
int left = -1;
int top = -1;
int right = -1;
int bottom = -1;
const auto& enclosing_rectangle = annotation.filled_oval().oval().rectangle();
if (enclosing_rectangle.normalized()) {
CHECK(NormalizedtoPixelCoordinates(enclosing_rectangle.left(),
enclosing_rectangle.top(), image_width_,
image_height_, &left, &top));
CHECK(NormalizedtoPixelCoordinates(
enclosing_rectangle.right(), enclosing_rectangle.bottom(), image_width_,
image_height_, &right, &bottom));
} else {
left = static_cast<int>(enclosing_rectangle.left() * scale_factor_);
top = static_cast<int>(enclosing_rectangle.top() * scale_factor_);
right = static_cast<int>(enclosing_rectangle.right() * scale_factor_);
bottom = static_cast<int>(enclosing_rectangle.bottom() * scale_factor_);
}
cv::Point center((left + right) / 2, (top + bottom) / 2);
cv::Size size(std::max(0, (right - left) / 2),
std::max(0, (bottom - top) / 2));
const double rotation = enclosing_rectangle.rotation() / M_PI * 180.f;
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
cv::ellipse(mat_image_, center, size, rotation, 0, 360, color, -1);
}
void AnnotationRenderer::DrawArrow(const RenderAnnotation& annotation) {
int x_start = -1;
int y_start = -1;
int x_end = -1;
int y_end = -1;
const auto& arrow = annotation.arrow();
if (arrow.normalized()) {
CHECK(NormalizedtoPixelCoordinates(arrow.x_start(), arrow.y_start(),
image_width_, image_height_, &x_start,
&y_start));
CHECK(NormalizedtoPixelCoordinates(arrow.x_end(), arrow.y_end(),
image_width_, image_height_, &x_end,
&y_end));
} else {
x_start = static_cast<int>(arrow.x_start() * scale_factor_);
y_start = static_cast<int>(arrow.y_start() * scale_factor_);
x_end = static_cast<int>(arrow.x_end() * scale_factor_);
y_end = static_cast<int>(arrow.y_end() * scale_factor_);
}
cv::Point arrow_start(x_start, y_start);
cv::Point arrow_end(x_end, y_end);
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
// Draw the main arrow line.
cv::line(mat_image_, arrow_start, arrow_end, color, thickness);
// Compute the arrowtip left and right vectors.
Vector2_d L_start(static_cast<double>(x_start), static_cast<double>(y_start));
Vector2_d L_end(static_cast<double>(x_end), static_cast<double>(y_end));
Vector2_d U = (L_end - L_start).Normalize();
Vector2_d V = U.Ortho();
double line_length = (L_end - L_start).Norm();
constexpr double kArrowTipLengthProportion = 0.2;
double arrowtip_length = kArrowTipLengthProportion * line_length;
Vector2_d arrowtip_left = L_end - arrowtip_length * U + arrowtip_length * V;
Vector2_d arrowtip_right = L_end - arrowtip_length * U - arrowtip_length * V;
// Draw the arrowtip left and right lines.
cv::Point arrowtip_left_start(static_cast<int>(round(arrowtip_left[0])),
static_cast<int>(round(arrowtip_left[1])));
cv::Point arrowtip_right_start(static_cast<int>(round(arrowtip_right[0])),
static_cast<int>(round(arrowtip_right[1])));
cv::line(mat_image_, arrowtip_left_start, arrow_end, color, thickness);
cv::line(mat_image_, arrowtip_right_start, arrow_end, color, thickness);
}
void AnnotationRenderer::DrawPoint(const RenderAnnotation& annotation) {
const auto& point = annotation.point();
int x = -1;
int y = -1;
if (point.normalized()) {
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
} else {
x = static_cast<int>(point.x() * scale_factor_);
y = static_cast<int>(point.y() * scale_factor_);
}
cv::Point point_to_draw(x, y);
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
cv::circle(mat_image_, point_to_draw, thickness, color, -1);
}
void AnnotationRenderer::DrawLine(const RenderAnnotation& annotation) {
int x_start = -1;
int y_start = -1;
int x_end = -1;
int y_end = -1;
const auto& line = annotation.line();
if (line.normalized()) {
CHECK(NormalizedtoPixelCoordinates(line.x_start(), line.y_start(),
image_width_, image_height_, &x_start,
&y_start));
CHECK(NormalizedtoPixelCoordinates(line.x_end(), line.y_end(), image_width_,
image_height_, &x_end, &y_end));
} else {
x_start = static_cast<int>(line.x_start() * scale_factor_);
y_start = static_cast<int>(line.y_start() * scale_factor_);
x_end = static_cast<int>(line.x_end() * scale_factor_);
y_end = static_cast<int>(line.y_end() * scale_factor_);
}
cv::Point start(x_start, y_start);
cv::Point end(x_end, y_end);
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
cv::line(mat_image_, start, end, color, thickness);
}
void AnnotationRenderer::DrawGradientLine(const RenderAnnotation& annotation) {
int x_start = -1;
int y_start = -1;
int x_end = -1;
int y_end = -1;
const auto& line = annotation.gradient_line();
if (line.normalized()) {
CHECK(NormalizedtoPixelCoordinates(line.x_start(), line.y_start(),
image_width_, image_height_, &x_start,
&y_start));
CHECK(NormalizedtoPixelCoordinates(line.x_end(), line.y_end(), image_width_,
image_height_, &x_end, &y_end));
} else {
x_start = static_cast<int>(line.x_start() * scale_factor_);
y_start = static_cast<int>(line.y_start() * scale_factor_);
x_end = static_cast<int>(line.x_end() * scale_factor_);
y_end = static_cast<int>(line.y_end() * scale_factor_);
}
const cv::Point start(x_start, y_start);
const cv::Point end(x_end, y_end);
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
const cv::Scalar color1 = MediapipeColorToOpenCVColor(line.color1());
const cv::Scalar color2 = MediapipeColorToOpenCVColor(line.color2());
cv_line2(mat_image_, start, end, color1, color2, thickness);
}
void AnnotationRenderer::DrawText(const RenderAnnotation& annotation) {
int left = -1;
int baseline = -1;
int font_size = -1;
const auto& text = annotation.text();
if (text.normalized()) {
CHECK(NormalizedtoPixelCoordinates(text.left(), text.baseline(),
image_width_, image_height_, &left,
&baseline));
font_size = static_cast<int>(round(text.font_height() * image_height_));
} else {
left = static_cast<int>(text.left() * scale_factor_);
baseline = static_cast<int>(text.baseline() * scale_factor_);
font_size = static_cast<int>(text.font_height() * scale_factor_);
}
cv::Point origin(left, baseline);
const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
const int thickness =
ClampThickness(round(annotation.thickness() * scale_factor_));
const int font_face = text.font_face();
const double font_scale = ComputeFontScale(font_face, font_size, thickness);
int text_baseline = 0;
cv::Size text_size = cv::getTextSize(text.display_text(), font_face,
font_scale, thickness, &text_baseline);
if (text.center_horizontally()) {
origin.x -= text_size.width / 2;
}
if (text.center_vertically()) {
origin.y += text_size.height / 2;
}
cv::putText(mat_image_, text.display_text(), origin, font_face, font_scale,
color, thickness, /*lineType=*/8,
/*bottomLeftOrigin=*/flip_text_vertically_);
}
double AnnotationRenderer::ComputeFontScale(int font_face, int font_size,
int thickness) {
double base_line;
double cap_line;
// The details below of how to compute the font scale from font face,
// thickness, and size were inferred from the OpenCV implementation.
switch (font_face) {
case cv::FONT_HERSHEY_SIMPLEX:
case cv::FONT_HERSHEY_DUPLEX:
case cv::FONT_HERSHEY_COMPLEX:
case cv::FONT_HERSHEY_TRIPLEX:
case cv::FONT_HERSHEY_SCRIPT_SIMPLEX:
case cv::FONT_HERSHEY_SCRIPT_COMPLEX:
base_line = 9;
cap_line = 12;
break;
case cv::FONT_HERSHEY_PLAIN:
base_line = 5;
cap_line = 4;
break;
case cv::FONT_HERSHEY_COMPLEX_SMALL:
base_line = 6;
cap_line = 7;
break;
default:
return -1;
}
const double thick = static_cast<double>(thickness + 1);
return (static_cast<double>(font_size) - (thick / 2.0F)) /
(cap_line + base_line);
}
} // namespace mediapipe

View File

@ -0,0 +1,158 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_
#define MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_
#include <string>
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/opencv_highgui_inc.h"
#include "mediapipe/util/render_data.pb.h"
namespace mediapipe {
// The renderer library for rendering data on images.
//
// Example usage:
//
// AnnotationRenderer renderer;
//
// std::unique_ptr<cv::Mat> mat_image(new cv::Mat(kImageHeight, kImageWidth,
// CV_8UC3));
//
// renderer.AdoptImage(mat_image.get());
//
// RenderData render_data_0;
// <FILL RENDER_DATA_0 WITH ANNOTATIONS>
//
// renderer.RenderDataOnImage(render_data_0);
//
// RenderData render_data_1;
// <FILL RENDER_DATA_1 WITH ANNOTATIONS>
//
// renderer.RenderDataOnImage(render_data_1);
//
// UseRenderedImage(mat_image.get());
class AnnotationRenderer {
public:
explicit AnnotationRenderer() {}
explicit AnnotationRenderer(const cv::Mat& mat_image)
: image_width_(mat_image.cols),
image_height_(mat_image.rows),
mat_image_(mat_image.clone()) {}
// Renders the image with the input render data.
void RenderDataOnImage(const RenderData& render_data);
// Resets the renderer with a new image. Does not own input_image. input_image
// must not be modified by caller during rendering.
void AdoptImage(cv::Mat* input_image);
// Gets image dimensions.
int GetImageWidth() const;
int GetImageHeight() const;
// Sets whether text should be rendered upside down. This is default to false
// and text is rendered assuming the underlying image has its origin at the
// top-left corner. Set it to true if the image origin is at the bottom-left
// corner.
void SetFlipTextVertically(bool flip);
// For GPU rendering optimization in AnnotationOverlayCalculator.
// Scale all incoming coordinates,sizes,thickness,etc. by this amount.
// Should be in the range (0-1].
// See 'gpu_scale_factor' in annotation_overlay_calculator.proto
void SetScaleFactor(float scale_factor);
float GetScaleFactor() { return scale_factor_; }
private:
// Draws a rectangle on the image as described in the annotation.
void DrawRectangle(const RenderAnnotation& annotation);
// Draws a filled rectangle on the image as described in the annotation.
void DrawFilledRectangle(const RenderAnnotation& annotation);
// Draws an oval on the image as described in the annotation.
void DrawOval(const RenderAnnotation& annotation);
// Draws a filled oval on the image as described in the annotation.
void DrawFilledOval(const RenderAnnotation& annotation);
// Draws an arrow on the image as described in the annotation.
void DrawArrow(const RenderAnnotation& annotation);
// Draws a point on the image as described in the annotation.
void DrawPoint(const RenderAnnotation& annotation);
// Draws lipstick on the face.
void DrawLipstick(const RenderData& render_data);
// Whitens teeth.
void WhitenTeeth(const RenderData& render_data);
// Draws a line segment on the image as described in the annotation.
void DrawLine(const RenderAnnotation& annotation);
// Draws a 2-tone line segment on the image as described in the annotation.
void DrawGradientLine(const RenderAnnotation& annotation);
// Draws a text on the image as described in the annotation.
void DrawText(const RenderAnnotation& annotation);
// Draws a rounded rectangle on the image as described in the annotation.
void DrawRoundedRectangle(const RenderAnnotation& annotation);
// Draws a filled rounded rectangle on the image as described in the
// annotation.
void DrawFilledRoundedRectangle(const RenderAnnotation& annotation);
// Helper function for drawing a rectangle with rounded corners. The
// parameters are the same as in the OpenCV function rectangle().
// corner_radius: A positive int value defining the radius of the round
// corners.
void DrawRoundedRectangle(cv::Mat src, cv::Point top_left,
cv::Point bottom_right,
const cv::Scalar& line_color, int thickness = 1,
int line_type = 8, int corner_radius = 0);
// Computes the font scale from font_face, size and thickness.
double ComputeFontScale(int font_face, int font_size, int thickness);
cv::Mat FormFacePartMask(std::vector<int> orderList, const RenderData &render_data);
cv::Mat predict_forehead_mask(const RenderData &render_data, double face_box_min_y);
void smooth_face(const RenderData &render_data);
std::tuple<double, double, double, double> GetFaceBox(const RenderData &render_data);
// Width and Height of the image (in pixels).
int image_width_ = -1;
int image_height_ = -1;
// The image for rendering.
cv::Mat mat_image_;
// See SetFlipTextVertically(bool).
bool flip_text_vertically_ = false;
// See SetScaleFactor(float)
float scale_factor_ = 1.0;
};
} // namespace mediapipe
#endif // MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_

View File

@ -0,0 +1,104 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h"
#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
namespace mediapipe {
namespace {
constexpr int kNumFaceLandmarkConnections = 132;
// Pairs of landmark indices to be rendered with connections.
constexpr int kFaceLandmarkConnections[] = {
// Lips.
61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321,
321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267,
269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14,
14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81,
81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308,
// Left eye.
33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133,
33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173,
133,
// Left eyebrow.
46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107,
// Left iris.
474, 475, 475, 476, 476, 477, 477, 474,
// Right eye.
263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382,
362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398,
398, 362,
// Right eyebrow.
276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296,
336,
// Right iris.
469, 470, 470, 471, 471, 472, 472, 469,
// Face oval.
10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
} // namespace
// A calculator that converts face landmarks to RenderData proto for
// visualization. Ignores landmark_connections specified in
// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set
// of landmark connections specific to face landmark (defined in
// kFaceLandmarkConnections[] above).
//
// Example config:
// node {
// calculator: "FaceLandmarksToRenderDataCalculator"
// input_stream: "NORM_LANDMARKS:landmarks"
// output_stream: "RENDER_DATA:render_data"
// options {
// [LandmarksToRenderDataCalculatorOptions.ext] {
// landmark_color { r: 0 g: 255 b: 0 }
// connection_color { r: 0 g: 255 b: 0 }
// thickness: 4.0
// }
// }
// }
class FaceLandmarksToRenderDataCalculator
: public LandmarksToRenderDataCalculator {
public:
absl::Status Open(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator);
absl::Status FaceLandmarksToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<mediapipe::LandmarksToRenderDataCalculatorOptions>();
for (int i = 0; i < kNumFaceLandmarkConnections; ++i) {
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]);
landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]);
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,70 @@
# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite
# on CPU.
# Path to the input video file. (string)
input_side_packet: "input_video_path"
# Path to the output video file. (string)
input_side_packet: "output_video_path"
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input video.
node {
calculator: "FaceRendererCpu"
input_stream: "IMAGE:input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,66 @@
# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU.
# Input image. (GpuBuffer)
input_stream: "input_video"
# Output image with rendered results. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}
# Subgraph that renders face-landmark annotation onto the input image.
node {
calculator: "FaceRendererGpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:multi_face_landmarks"
input_stream: "NORM_RECTS:face_rects_from_landmarks"
input_stream: "DETECTIONS:face_detections"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,57 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "renderer_calculators",
deps = [
"//mediapipe/calculators/core:split_proto_list_calculator",
"//mediapipe/util:annotation_renderer",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/beauty:form_face_mask_calculator",
"//mediapipe/calculators/beauty:smooth_face_calculator",
"//mediapipe/calculators/beauty:draw_lipstick_calculator",
"//mediapipe/calculators/beauty:whiten_teeth_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:rect_to_render_data_calculator",
"//mediapipe/graphs/beauty/calculators:face_landmarks_to_render_data_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_gpu",
graph = "face_renderer_gpu.pbtxt",
register_as = "FaceRendererGpu",
deps = [
":renderer_calculators",
],
)
mediapipe_simple_subgraph(
name = "face_renderer_cpu",
graph = "face_renderer_cpu.pbtxt",
register_as = "FaceRendererCpu",
deps = [
":renderer_calculators",
],
)

View File

@ -0,0 +1,127 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# CPU image with rendered data. (ImageFrame)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:landmark_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmarks_render_data"
input_stream: "BATCH_END:landmark_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
#node {
# calculator: "RectToRenderDataCalculator"
# input_stream: "NORM_RECTS:rects"
# output_stream: "RENDER_DATA:rects_render_data"
# node_options: {
# [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
# filled: false
# color { r: 255 g: 0 b: 0 }
# thickness: 4.0
# }
# }
#}
node {
calculator: "FormFaceMaskCalculator"
input_stream: "IMAGE:input_image"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
output_stream: "FACEBOX:face_box"
output_stream: "MASK:multi_mask"
}
node {
calculator: "DrawLipstickCalculator"
input_stream: "IMAGE:input_image"
input_stream: "MASK:0:multi_mask"
output_stream: "IMAGE:input_image_1"
}
node {
calculator: "WhitenTeethCalculator"
input_stream: "IMAGE:input_image_1"
input_stream: "MASK:0:multi_mask"
output_stream: "IMAGE:input_image_2"
}
node {
calculator: "SmoothFaceCalculator"
input_stream: "IMAGE:input_image_2"
input_stream: "MASK:0:multi_mask"
input_stream: "FACEBOX:face_box"
output_stream: "IMAGE:output_image"
}
# Draws annotations and overlays them on top of the input images.
#node {
# calculator: "AnnotationOverlayCalculator"
# input_stream: "IMAGE:input_image"
# input_stream: "VECTOR:0:multi_face_landmarks_render_data"
# output_stream: "IMAGE:output_image"
#}

View File

@ -0,0 +1,96 @@
# MediaPipe face mesh rendering subgraph.
type: "FaceRendererGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:input_image"
# Collection of detected/predicted faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "LANDMARKS:multi_face_landmarks"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
input_stream: "NORM_RECTS:rects"
# Detected palms. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# GPU image with rendered data. (GpuBuffer)
output_stream: "IMAGE:output_image"
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_image"
output_stream: "SIZE:image_size"
}
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detections_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Outputs each element of multi_face_landmarks at a fake timestamp for the rest
# of the graph to process. At the end of the loop, outputs the BATCH_END
# timestamp for downstream calculators to inform them that all elements in the
# vector have been processed.
node {
calculator: "BeginLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITERABLE:multi_face_landmarks"
output_stream: "ITEM:face_landmarks"
output_stream: "BATCH_END:end_timestamp"
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "FaceLandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 2
visualize_landmark_depth: false
}
}
}
# Collects a RenderData object for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END
# timestamp.
node {
calculator: "EndLoopRenderDataCalculator"
input_stream: "ITEM:landmarks_render_data"
input_stream: "BATCH_END:end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks_render_data"
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECTS:rects"
output_stream: "RENDER_DATA:rects_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
#input_stream: "detections_render_data"
input_stream: "VECTOR:0:multi_face_landmarks_render_data"
#input_stream: "rects_render_data"
output_stream: "IMAGE_GPU:output_image"
}

Binary file not shown.