422 lines
13 KiB
C++
422 lines
13 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <math.h>
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <iostream>
|
|
#include <tuple>
|
|
|
|
#include <memory>
|
|
|
|
#include "absl/strings/str_cat.h"
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/framework/calculator_options.pb.h"
|
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
|
#include "mediapipe/framework/formats/image_frame.h"
|
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
|
#include "mediapipe/framework/formats/video_stream_header.h"
|
|
#include "mediapipe/framework/port/logging.h"
|
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
|
#include "mediapipe/framework/port/status.h"
|
|
#include "mediapipe/framework/port/logging.h"
|
|
#include "mediapipe/framework/port/vector.h"
|
|
|
|
namespace mediapipe
|
|
{
|
|
namespace
|
|
{
|
|
|
|
constexpr char kMaskTag[] = "MASK";
|
|
constexpr char kFaceBoxTag[] = "FACEBOX";
|
|
constexpr char kImageFrameTag[] = "IMAGE";
|
|
|
|
enum
|
|
{
|
|
ATTRIB_VERTEX,
|
|
ATTRIB_TEXTURE_POSITION,
|
|
NUM_ATTRIBUTES
|
|
};
|
|
|
|
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
|
|
} // namespace
|
|
|
|
class SmoothFaceCalculator1 : public CalculatorBase
|
|
{
|
|
public:
|
|
SmoothFaceCalculator1() = default;
|
|
~SmoothFaceCalculator1() override = default;
|
|
|
|
static absl::Status GetContract(CalculatorContract *cc);
|
|
|
|
// From Calculator.
|
|
absl::Status Open(CalculatorContext *cc) override;
|
|
absl::Status Process(CalculatorContext *cc) override;
|
|
absl::Status Close(CalculatorContext *cc) override;
|
|
|
|
private:
|
|
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
|
|
std::unique_ptr<cv::Mat> &image_mat,
|
|
ImageFormat::Format *target_format);
|
|
|
|
absl::Status RenderToCpu(
|
|
CalculatorContext *cc, const ImageFormat::Format &target_format,
|
|
uchar *data_image);
|
|
|
|
absl::Status SmoothFace(CalculatorContext *cc,
|
|
const std::unordered_map<std::string, cv::Mat> &mask_vec,
|
|
const std::tuple<double, double, double, double> &face_box);
|
|
|
|
cv::Mat predict_forehead_mask(const std::unordered_map<std::string, cv::Mat> &mask_vec, double face_box_min_y);
|
|
|
|
// Indicates if image frame is available as input.
|
|
bool image_frame_available_ = false;
|
|
|
|
int image_width_;
|
|
int image_height_;
|
|
cv::Mat mat_image_;
|
|
cv::Mat not_full_face;
|
|
std::vector<double> face_box;
|
|
std::unique_ptr<cv::Mat> image_mat;
|
|
};
|
|
REGISTER_CALCULATOR(SmoothFaceCalculator1);
|
|
|
|
absl::Status SmoothFaceCalculator1::GetContract(CalculatorContract *cc)
|
|
{
|
|
CHECK_GE(cc->Inputs().NumEntries(), 1);
|
|
|
|
if (cc->Inputs().HasTag(kImageFrameTag))
|
|
{
|
|
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
|
|
CHECK(cc->Outputs().HasTag(kImageFrameTag));
|
|
CHECK(cc->Outputs().HasTag(kMaskTag));
|
|
}
|
|
|
|
// Data streams to render.
|
|
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
|
|
++id)
|
|
{
|
|
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
|
|
std::string tag = tag_and_index.first;
|
|
if (tag == kMaskTag)
|
|
{
|
|
cc->Inputs().Get(id).Set<std::vector<std::unordered_map<std::string, cv::Mat>>>();
|
|
}
|
|
else if (tag.empty())
|
|
{
|
|
// Empty tag defaults to accepting a single object of Mat type.
|
|
cc->Inputs().Get(id).Set<cv::Mat>();
|
|
}
|
|
|
|
if (tag == kFaceBoxTag)
|
|
{
|
|
cc->Inputs().Get(id).Set<std::vector<std::tuple<double, double, double, double>>>();
|
|
}
|
|
}
|
|
|
|
if (cc->Outputs().HasTag(kImageFrameTag))
|
|
{
|
|
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
|
|
}
|
|
if (cc->Outputs().HasTag(kMaskTag))
|
|
{
|
|
cc->Outputs().Tag(kMaskTag).Set<cv::Mat>();
|
|
}
|
|
if (cc->Outputs().HasTag(kFaceBoxTag))
|
|
{
|
|
cc->Outputs().Tag(kFaceBoxTag).Set<std::vector<double>>();
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::Open(CalculatorContext *cc)
|
|
{
|
|
cc->SetOffset(TimestampDiff(0));
|
|
|
|
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
|
|
{
|
|
image_frame_available_ = true;
|
|
}
|
|
|
|
// Set the output header based on the input header (if present).
|
|
const char *tag = kImageFrameTag;
|
|
if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty())
|
|
{
|
|
const auto &input_header =
|
|
cc->Inputs().Tag(tag).Header().Get<VideoHeader>();
|
|
auto *output_video_header = new VideoHeader(input_header);
|
|
cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header));
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::Process(CalculatorContext *cc)
|
|
{
|
|
if (cc->Inputs().HasTag(kImageFrameTag) &&
|
|
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
|
|
{
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Initialize render target, drawn with OpenCV.
|
|
ImageFormat::Format target_format;
|
|
|
|
if (cc->Outputs().HasTag(kImageFrameTag))
|
|
{
|
|
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
|
|
}
|
|
|
|
mat_image_ = *image_mat.get();
|
|
image_width_ = image_mat->cols;
|
|
image_height_ = image_mat->rows;
|
|
|
|
if (cc->Inputs().HasTag(kMaskTag) &&
|
|
!cc->Inputs().Tag(kMaskTag).IsEmpty() &&
|
|
cc->Inputs().HasTag(kFaceBoxTag) &&
|
|
!cc->Inputs().Tag(kFaceBoxTag).IsEmpty())
|
|
{
|
|
const std::vector<std::unordered_map<std::string, cv::Mat>> &mask_vec =
|
|
cc->Inputs().Tag(kMaskTag).Get<std::vector<std::unordered_map<std::string, cv::Mat>>>();
|
|
|
|
const std::vector<std::tuple<double, double, double, double>> &face_boxes =
|
|
cc->Inputs().Tag(kFaceBoxTag).Get<std::vector<std::tuple<double, double, double, double>>>();
|
|
|
|
if (mask_vec.size() > 0 && face_boxes.size() > 0)
|
|
{
|
|
for (int i = 0; i < mask_vec.size(); i++)
|
|
MP_RETURN_IF_ERROR(SmoothFace(cc, mask_vec[i], face_boxes[i]));
|
|
}
|
|
}
|
|
// Copy the rendered image to output.
|
|
uchar *image_mat_ptr = image_mat->data;
|
|
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr));
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::Close(CalculatorContext *cc)
|
|
{
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::RenderToCpu(
|
|
CalculatorContext *cc, const ImageFormat::Format &target_format,
|
|
uchar *data_image)
|
|
{
|
|
auto output_frame1 = absl::make_unique<ImageFrame>(
|
|
target_format, image_width_, image_height_);
|
|
|
|
output_frame1->CopyPixelData(target_format, image_width_, image_height_, data_image,
|
|
ImageFrame::kDefaultAlignmentBoundary);
|
|
|
|
if (cc->Outputs().HasTag(kImageFrameTag))
|
|
{
|
|
cc->Outputs()
|
|
.Tag(kImageFrameTag)
|
|
.Add(output_frame1.release(), cc->InputTimestamp());
|
|
}
|
|
|
|
auto output_frame2 = absl::make_unique<cv::Mat>(not_full_face);
|
|
|
|
if (cc->Outputs().HasTag(kMaskTag))
|
|
{
|
|
cc->Outputs()
|
|
.Tag(kMaskTag)
|
|
.Add(output_frame2.release(), cc->InputTimestamp());
|
|
}
|
|
|
|
auto output_frame3 = absl::make_unique<std::vector<double>>(face_box);
|
|
|
|
if (cc->Outputs().HasTag(kFaceBoxTag))
|
|
{
|
|
cc->Outputs()
|
|
.Tag(kFaceBoxTag)
|
|
.Add(output_frame3.release(), cc->InputTimestamp());
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::CreateRenderTargetCpu(
|
|
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
|
|
ImageFormat::Format *target_format)
|
|
{
|
|
if (image_frame_available_)
|
|
{
|
|
const auto &input_frame =
|
|
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
|
|
|
|
int target_mat_type;
|
|
switch (input_frame.Format())
|
|
{
|
|
case ImageFormat::SRGBA:
|
|
*target_format = ImageFormat::SRGBA;
|
|
target_mat_type = CV_8UC4;
|
|
break;
|
|
case ImageFormat::SRGB:
|
|
*target_format = ImageFormat::SRGB;
|
|
target_mat_type = CV_8UC3;
|
|
break;
|
|
case ImageFormat::GRAY8:
|
|
*target_format = ImageFormat::SRGB;
|
|
target_mat_type = CV_8UC3;
|
|
break;
|
|
default:
|
|
return absl::UnknownError("Unexpected image frame format.");
|
|
break;
|
|
}
|
|
|
|
image_mat = absl::make_unique<cv::Mat>(
|
|
input_frame.Height(), input_frame.Width(), target_mat_type);
|
|
|
|
auto input_mat = formats::MatView(&input_frame);
|
|
|
|
if (input_frame.Format() == ImageFormat::GRAY8)
|
|
{
|
|
cv::Mat rgb_mat;
|
|
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
|
|
rgb_mat.copyTo(*image_mat);
|
|
}
|
|
else
|
|
{
|
|
input_mat.copyTo(*image_mat);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
image_mat = absl::make_unique<cv::Mat>(
|
|
150, 150, CV_8UC4,
|
|
cv::Scalar::all(255));
|
|
*target_format = ImageFormat::SRGBA;
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
cv::Mat SmoothFaceCalculator1::predict_forehead_mask(const std::unordered_map<std::string, cv::Mat> &mask_vec, double face_box_min_y)
|
|
{
|
|
cv::Mat part_forehead_mask = mask_vec.find("PART_FOREHEAD_B")->second.clone();
|
|
part_forehead_mask.convertTo(part_forehead_mask, CV_32F, 1.0 / 255);
|
|
part_forehead_mask.convertTo(part_forehead_mask, CV_8U);
|
|
|
|
cv::Mat image_sm, image_sm_hsv, skinMask;
|
|
|
|
cv::resize(mat_image_, image_sm, cv::Size(image_width_, image_height_));
|
|
cv::cvtColor(image_sm, image_sm_hsv, cv::COLOR_BGR2HSV);
|
|
|
|
std::vector<int> x, y;
|
|
std::vector<cv::Point> location;
|
|
|
|
cv::Vec3d hsv_min, hsv_max;
|
|
|
|
std::vector<cv::Mat> channels(3);
|
|
cv::split(image_sm_hsv, channels);
|
|
std::vector<std::vector<double>> minx(3), maxx(3);
|
|
int c = 0;
|
|
for (auto ch : channels)
|
|
{
|
|
cv::Mat row, mask_row;
|
|
double min, max;
|
|
for (int i = 0; i < ch.rows; i++)
|
|
{
|
|
row = ch.row(i);
|
|
mask_row = part_forehead_mask.row(i);
|
|
cv::minMaxLoc(row, &min, &max, 0, 0, mask_row);
|
|
minx[c].push_back(min);
|
|
maxx[c].push_back(max);
|
|
}
|
|
c++;
|
|
}
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
hsv_min[i] = *std::min_element(minx[i].begin(), minx[i].end());
|
|
}
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
hsv_max[i] = *std::max_element(maxx[i].begin(), maxx[i].end());
|
|
}
|
|
|
|
cv::Mat _forehead_kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(1, 1));
|
|
cv::inRange(image_sm_hsv, hsv_min, hsv_max, skinMask);
|
|
cv::erode(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
|
|
cv::dilate(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2);
|
|
skinMask.convertTo(skinMask, CV_8U, 1.0 / 255);
|
|
|
|
cv::findNonZero(skinMask, location);
|
|
|
|
double max_part_f, x_min_part, x_max_part;
|
|
|
|
for (auto &i : location)
|
|
{
|
|
x.push_back(i.x);
|
|
y.push_back(i.y);
|
|
}
|
|
|
|
cv::minMaxLoc(y, NULL, &max_part_f);
|
|
cv::minMaxLoc(x, &x_min_part, &x_max_part);
|
|
|
|
cv::Mat new_skin_mask = cv::Mat::zeros(skinMask.size(), CV_8U);
|
|
|
|
new_skin_mask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)) =
|
|
skinMask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part));
|
|
|
|
return new_skin_mask;
|
|
}
|
|
|
|
absl::Status SmoothFaceCalculator1::SmoothFace(CalculatorContext *cc,
|
|
const std::unordered_map<std::string, cv::Mat> &mask_vec,
|
|
const std::tuple<double, double, double, double> &face_boxx)
|
|
{
|
|
not_full_face = mask_vec.find("FACE_OVAL")->second.clone() -
|
|
// predict_forehead_mask(mask_vec, std::get<1>(face_boxx)) -
|
|
mask_vec.find("LEFT_EYE")->second.clone() -
|
|
mask_vec.find("RIGHT_EYE")->second.clone() -
|
|
mask_vec.find("LEFT_BROW")->second.clone() -
|
|
mask_vec.find("RIGHT_BROW")->second.clone() -
|
|
mask_vec.find("LIPS")->second.clone();
|
|
|
|
cv::resize(not_full_face,
|
|
not_full_face,
|
|
mat_image_.size(), 0, 0,
|
|
cv::INTER_LINEAR);
|
|
|
|
std::vector<int> x, y;
|
|
std::vector<cv::Point> location;
|
|
|
|
cv::findNonZero(not_full_face, location);
|
|
|
|
double min_y, min_x, max_x, max_y;
|
|
|
|
for (auto &i : location)
|
|
{
|
|
x.push_back(i.x);
|
|
y.push_back(i.y);
|
|
}
|
|
|
|
cv::minMaxLoc(x, &min_x, &max_x);
|
|
cv::minMaxLoc(y, &min_y, &max_y);
|
|
face_box.push_back(min_x);
|
|
face_box.push_back(min_y);
|
|
face_box.push_back(max_x);
|
|
face_box.push_back(max_y);
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace mediapipe
|