face align ready

This commit is contained in:
mslight 2022-07-02 01:10:59 +04:00
parent fcfe31a67b
commit 0c20ce50bd
11 changed files with 573 additions and 378 deletions

View File

@ -116,7 +116,7 @@ namespace mediapipe
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
cc->Inputs().Get(id).Set<std::vector<std::unordered_map<std::string, cv::Mat>>>();
}
else if (tag.empty())
{
@ -180,26 +180,12 @@ namespace mediapipe
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
const std::vector<std::unordered_map<std::string, cv::Mat>> &mask_vec =
cc->Inputs().Tag(kMaskTag).Get<std::vector<std::unordered_map<std::string, cv::Mat>>>();
if (mask_vec.size() > 0)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kMaskTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask_vec));
for (auto mask : mask_vec)
MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask));
}
// Copy the rendered image to output.
@ -308,8 +294,8 @@ namespace mediapipe
//__android_log_print(ANDROID_LOG_ERROR, "OVERSEAS", "%d ", mask_vec[1].size().height);
upper_lips_mask=mask_vec.find("UPPER_LIP")->second;
lower_lips_mask= mask_vec.find("LOWER_LIP")->second;
upper_lips_mask = mask_vec.find("UPPER_LIP")->second;
lower_lips_mask = mask_vec.find("LOWER_LIP")->second;
spec_lips_mask = upper_lips_mask + lower_lips_mask;
@ -385,7 +371,6 @@ namespace mediapipe
cv::cvtColor(masked_lips_crop, slice_gray, cv::COLOR_RGB2GRAY);
masked_lips_crop.copyTo(slice, slice_gray);
}
return absl::OkStatus();

View File

@ -16,6 +16,7 @@
#include <algorithm>
#include <cmath>
#include <iostream>
//#include <android/log.h>
#include <memory>
@ -119,7 +120,7 @@ namespace mediapipe
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
cc->Inputs().Get(id).Set<std::vector<std::unordered_map<std::string, cv::Mat>>>();
}
else if (tag.empty())
{
@ -129,7 +130,7 @@ namespace mediapipe
if (tag == kFaceBoxTag)
{
cc->Inputs().Get(id).Set<std::tuple<double, double, double, double>>();
cc->Inputs().Get(id).Set<std::vector<std::tuple<double, double, double, double>>>();
}
}
@ -178,7 +179,6 @@ namespace mediapipe
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kFaceBoxTag) &&
cc->Inputs().Tag(kFaceBoxTag).IsEmpty())
{
@ -194,32 +194,18 @@ namespace mediapipe
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
const std::vector<std::unordered_map<std::string, cv::Mat>> &mask_vec =
cc->Inputs().Tag(kMaskTag).Get<std::vector<std::unordered_map<std::string, cv::Mat>>>();
const std::vector<std::tuple<double, double, double, double>> &face_box =
cc->Inputs().Tag(kFaceBoxTag).Get<std::vector<std::tuple<double, double, double, double>>>();
if (mask_vec.size() > 0 && face_box.size() > 0)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && (tag != kMaskTag || tag != kFaceBoxTag))
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
for (int i = 0; i < mask_vec.size(); i++)
MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec[i], face_box[i]));
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
RET_CHECK_EQ(kFaceBoxTag, tag);
const std::tuple<double, double, double, double> &face_box =
cc->Inputs().Get(id).Get<std::tuple<double, double, double, double>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec, face_box));
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
@ -315,6 +301,7 @@ namespace mediapipe
cv::Mat SmoothFaceCalculator::predict_forehead_mask(std::unique_ptr<cv::Mat> &image_mat,
const std::unordered_map<std::string, cv::Mat> &mask_vec, double face_box_min_y)
{
cv::Mat mat_image__ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;

View File

@ -114,7 +114,7 @@ namespace mediapipe
std::string tag = tag_and_index.first;
if (tag == kMaskTag)
{
cc->Inputs().Get(id).Set<std::unordered_map<std::string, cv::Mat>>();
cc->Inputs().Get(id).Set<std::vector<std::unordered_map<std::string, cv::Mat>>>();
}
else if (tag.empty())
{
@ -178,26 +178,12 @@ namespace mediapipe
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
const std::vector<std::unordered_map<std::string, cv::Mat>> &mask_vec =
cc->Inputs().Tag(kMaskTag).Get<std::vector<std::unordered_map<std::string, cv::Mat>>>();
if (mask_vec.size() > 0)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kMaskTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
RET_CHECK_EQ(kMaskTag, tag);
const std::unordered_map<std::string, cv::Mat> &mask_vec =
cc->Inputs().Get(id).Get<std::unordered_map<std::string, cv::Mat>>();
if (mask_vec.size() > 1)
MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask_vec));
for (auto mask : mask_vec)
MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask));
}
// Copy the rendered image to output.

View File

@ -31,6 +31,7 @@ cc_library(
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",

View File

@ -16,7 +16,6 @@
#include <algorithm>
#include <cmath>
#include <map>
#include <string>
//#include <android/log.h>
@ -29,21 +28,20 @@
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/opencv_highgui_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
static const std::vector<cv::Point> FFHQ_NORM_LM = {
static const std::vector<cv::Point2f> FFHQ_NORM_LM = {
{638.68525475 / 1024, 486.24604922 / 1024},
{389.31496114 / 1024, 485.8921848 / 1024},
{513.67979275 / 1024, 620.8915371 / 1024},
@ -52,6 +50,8 @@ namespace mediapipe
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kVectorTag[] = "VECTOR";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS";
std::tuple<int, int> _normalized_to_pixel_coordinates(float normalized_x,
float normalized_y, int image_width, int image_height)
@ -63,8 +63,8 @@ namespace mediapipe
return {x_px, y_px};
};
static const std::unordered_set<cv::Point> FACEMESH_FACE_OVAL =
{{10, 338}, {338, 297}, {297, 332}, {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454}, {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365}, {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152}, {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136}, {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234}, {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103}, {103, 67}, {67, 109}, {109, 10}};
static const std::vector<cv::Point> FACEMESH_FACE_OVAL{
{10, 338}, {338, 297}, {297, 332}, {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454}, {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365}, {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152}, {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136}, {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234}, {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103}, {103, 67}, {67, 109}, {109, 10}};
enum
{
@ -77,8 +77,6 @@ namespace mediapipe
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
using Point = RenderAnnotation::Point;
bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y,
int image_width, int image_height, int *x_px,
int *y_px)
@ -99,6 +97,115 @@ namespace mediapipe
return true;
}
template <class LandmarkType>
bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark,
bool utilize_visibility,
float visibility_threshold,
bool utilize_presence,
float presence_threshold)
{
if (utilize_visibility && landmark.has_visibility() &&
landmark.visibility() < visibility_threshold)
{
return false;
}
if (utilize_presence && landmark.has_presence() &&
landmark.presence() < presence_threshold)
{
return false;
}
return true;
}
std::tuple<float, cv::Mat, cv::Mat> LandmarkTransform(
cv::Mat &source,
cv::Mat &target, float eps = 1e-7)
{
cv::Mat source_mean_mat, target_mean_mat, source1ch, target1ch;
cv::reduce(source, source_mean_mat, 0, CV_REDUCE_AVG, CV_32F);
cv::reduce(target, target_mean_mat, 0, CV_REDUCE_AVG, CV_32F);
source -= {source_mean_mat.at<float>(0, 0), source_mean_mat.at<float>(0, 1)};
target -= {target_mean_mat.at<float>(0, 0), target_mean_mat.at<float>(0, 1)};
source1ch = source.reshape(1, 5);
target1ch = target.reshape(1, 5);
cv::Mat source_std_mat, target_std_mat;
cv::meanStdDev(source1ch, cv::noArray(), source_std_mat);
cv::meanStdDev(target1ch, cv::noArray(), target_std_mat);
source_std_mat.convertTo(source_std_mat, CV_32F);
target_std_mat.convertTo(target_std_mat, CV_32F);
float source_std = source_std_mat.at<float>(0, 0);
float target_std = target_std_mat.at<float>(0, 0);
source /= source_std + eps;
target /= target_std + eps;
cv::Mat u, vt, rotation, w;
source1ch = source.reshape(1, 5);
target1ch = target.reshape(1, 5);
//std::cout << "R (numpy) = " << std::endl << cv::format(source, cv::Formatter::FMT_NUMPY) << std::endl << std::endl;
cv::SVD::compute(source1ch.t() * target1ch, w, u, vt);
rotation = (u * vt).t();
float scale = target_std / source_std + eps;
cv::Mat translation;
cv::subtract(target_mean_mat.reshape(1, 2), scale * rotation * source_mean_mat.reshape(1, 2), translation);
return std::make_tuple(scale, rotation, translation);
}
std::tuple<float, float, float, float> Crop(
std::unique_ptr<cv::Mat> &image_mat,
std::tuple<float, float, float, float> roi, float extend = 1.0,
bool square = false, float shift_x = 0.0, float shift_y = 0.0)
{
cv::Mat image = *image_mat.get();
int width = image_mat->cols;
int height = image_mat->rows;
auto &[left, top, right, bottom] = roi;
int y = static_cast<int>((bottom + top) / 2);
int x = static_cast<int>((right + left) / 2);
int size_y = static_cast<int>(extend * (bottom - top) / 2);
int size_x = static_cast<int>(extend * (right - left) / 2);
if (square)
size_x = size_y = std::max(size_x, size_y);
x += static_cast<int>(shift_x * size_x);
y += static_cast<int>(shift_y * size_y);
roi = std::make_tuple(
std::max(0, x - size_x),
std::max(0, y - size_y),
std::min(x + size_x, width),
std::min(y + size_y, height));
image = image(cv::Range(bottom, top), cv::Range(left, right));
if (square)
cv::copyMakeBorder(
image, image, std::abs(std::min(0, y - size_y)),
std::abs(std::min(0, height - y - size_y)),
std::abs(std::min(0, x - size_x)),
std::abs(std::min(0, width - x - size_x)),
cv::BORDER_CONSTANT);
return roi;
}
} // namespace
class FastUtilsCalculator : public CalculatorBase
@ -125,30 +232,26 @@ namespace mediapipe
absl::Status Call(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks);
ImageFormat::Format &target_format,
std::vector<std::vector<cv::Point2f>> &lms_out);
absl::Status Align(std::unique_ptr<cv::Mat> &image_mat,
cv::Mat source_lm,
cv::Mat target_lm = cv::Mat(FFHQ_NORM_LM), cv::Size size = cv::Size(256, 256),
float extend = NULL, std::tuple<float, float, float, float> roi = {NULL, NULL, NULL, NULL});
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
std::unordered_map<std::string, const std::vector<int>> index_dict = {
std::vector<std::pair<std::string, const std::vector<int>>> index_dict = {
{"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}},
{"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}},
{"nose", {4}},
{"lips", {0, 13, 14, 17, 84}},
//{"lips", {0, 13, 14, 17, 84}},
{"leftLips", {61, 146}},
{"rightLips", {291, 375}},
};
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
int max_num_faces = 1;
bool refine_landmarks = True;
double min_detection_confidence = 0.5;
double min_tracking_confidence = 0.5;
std::unique_ptr<cv::Mat> image_mat;
};
REGISTER_CALCULATOR(FastUtilsCalculator);
@ -162,6 +265,23 @@ namespace mediapipe
CHECK(cc->Outputs().HasTag(kImageFrameTag));
}
RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) ||
cc->Inputs().HasTag(kNormLandmarksTag))
<< "None of the input streams are provided.";
RET_CHECK(!(cc->Inputs().HasTag(kLandmarksTag) &&
cc->Inputs().HasTag(kNormLandmarksTag)))
<< "Can only one type of landmark can be taken. Either absolute or "
"normalized landmarks.";
if (cc->Inputs().HasTag(kLandmarksTag))
{
cc->Inputs().Tag(kLandmarksTag).Set<std::vector<LandmarkList>>();
}
if (cc->Inputs().HasTag(kNormLandmarksTag))
{
cc->Inputs().Tag(kNormLandmarksTag).Set<std::vector<NormalizedLandmarkList>>();
}
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
@ -202,50 +322,29 @@ namespace mediapipe
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kLandmarksTag) &&
cc->Inputs().Tag(kLandmarksTag).IsEmpty())
{
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kNormLandmarksTag) &&
cc->Inputs().Tag(kNormLandmarksTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
std::unordered_map<std::string, cv::Mat> all_masks;
std::vector<std::vector<cv::Point2f>> lms_out;
if (cc->Outputs().HasTag(kImageFrameTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kVectorTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
if (tag.empty())
{
// Empty tag defaults to accepting a single object of RenderData type.
const RenderData &render_data = cc->Inputs().Get(id).Get<RenderData>();
MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks));
}
else
{
RET_CHECK_EQ(kVectorTag, tag);
const std::vector<RenderData> &render_data_vec =
cc->Inputs().Get(id).Get<std::vector<RenderData>>();
for (const RenderData &render_data : render_data_vec)
{
MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks));
}
}
}
MP_RETURN_IF_ERROR(Call(cc, image_mat, target_format, lms_out));
cv::Mat source_lm = cv::Mat(lms_out[0]);
MP_RETURN_IF_ERROR(Align(image_mat, source_lm));
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
@ -339,61 +438,99 @@ namespace mediapipe
absl::Status FastUtilsCalculator::Call(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks)
ImageFormat::Format &target_format,
std::vector<std::vector<cv::Point2f>> &lms_out)
{
cv::Mat mat_image_ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
cv::Mat mask;
std::vector<cv::Point> kps, landmarks;
std::vector<std::vector<cv::Point>> lms_out;
std::vector<cv::Point2f> kps, landmarks;
int c = 0;
if (cc->Inputs().HasTag(kNormLandmarksTag))
{
const std::vector<NormalizedLandmarkList> &landmarkslist =
cc->Inputs().Tag(kNormLandmarksTag).Get<std::vector<NormalizedLandmarkList>>();
std::vector<cv::Point2f> point_array;
for (const auto &face : landmarkslist)
{
for (const auto &[key, value] : index_dict)
{
for (auto order : value)
{
c = 0;
for (auto &annotation : render_data.render_annotations())
const NormalizedLandmark &landmark = face.landmark(order);
if (!IsLandmarkVisibleAndPresent<NormalizedLandmark>(
landmark, false,
0.0, false,
0.0))
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
if (order == c)
{
const auto &point = annotation.point();
continue;
}
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
kps.push_back(cv::Point(x, y));
kps.push_back(cv::Point2f(x, y));
}
c += 1;
}
}
}
double sumx = 0, sumy = 0, meanx, meany;
for (auto p : kps)
{
sumx += p.x;
sumy += p.y;
}
meanx = sumx / kps.size();
meany = sumy / kps.size();
cv::Mat mean;
cv::reduce(kps, mean, 1, CV_REDUCE_AVG, CV_32F);
landmarks.push_back({meanx, meany});
landmarks.push_back({mean.at<float>(0, 0), mean.at<float>(0, 1)});
kps.clear();
}
lms_out.push_back(landmarks);
landmarks.clear();
}
}
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::Align(std::unique_ptr<cv::Mat> &image_mat,
cv::Mat source_lm,
cv::Mat target_lm, cv::Size size,
float extend, std::tuple<float, float, float, float> roi)
{
cv::Mat mat_image_ = *image_mat.get();
cv::Mat source, target;
source_lm.convertTo(source, CV_32F);
target_lm.convertTo(target, CV_32F);
if (target.at<float>(0, 0) < 1)
{
target *= size.width;
}
if (std::get<0>(roi) != NULL)
{
roi = Crop(image_mat, roi, extend);
auto [left, top, right, bottom] = roi;
source(cv::Range(cv::Range::all()), cv::Range(0, 1)) -= left;
source(cv::Range(cv::Range::all()), cv::Range(1, 2)) -= top;
}
auto [scale, rotation, translation] = LandmarkTransform(source, target);
std::vector<cv::Mat> vec_mat;
vec_mat.push_back(scale * rotation);
vec_mat.push_back(translation.reshape(1, {2, 1}));
cv::Mat transform, image;
cv::hconcat(vec_mat, transform);
cv::warpAffine(mat_image_, *image_mat, transform, size, 1, 0, 0.0);
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -55,20 +55,18 @@ namespace mediapipe
constexpr char kFaceBoxTag[] = "FACEBOX";
constexpr char kImageFrameTag[] = "IMAGE";
static const std::vector<int> UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78};
static const std::vector<int> LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,
378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150,
136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162,
21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10};
static const std::vector<int> MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95};
static const std::vector<int> PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71};
static const std::vector<int> LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7};
static const std::vector<int> RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382};
static const std::vector<int> LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146};
static const std::vector<int> LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46};
static const std::vector<int> RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285};
std::unordered_map<std::string, const std::vector<int>> orderList = {
{"UPPER_LIP", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}},
{"LOWER_LIP", {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}},
{"FACE_OVAL", {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}},
{"MOUTH_INSIDE", {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}},
{"LEFT_EYE", {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}},
{"RIGHT_EYE", {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}},
{"LEFT_BROW", {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}},
{"RIGHT_BROW", {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}},
{"LIPS", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}},
{"PART_FOREHEAD_B", {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}},
};
template <class LandmarkType>
bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark,
@ -198,116 +196,9 @@ namespace mediapipe
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
MP_RETURN_IF_ERROR(GetMasks(cc, all_masks, image_mat));
std::unordered_map<std::string, const std::vector<int>> orderList;
orderList.insert(make_pair("UPPER_LIP", UPPER_LIP));
orderList.insert(make_pair("LOWER_LIP", LOWER_LIP));
orderList.insert(make_pair("FACE_OVAL", FACE_OVAL));
orderList.insert(make_pair("MOUTH_INSIDE", MOUTH_INSIDE));
orderList.insert(make_pair("LEFT_EYE", LEFT_EYE));
orderList.insert(make_pair("RIGHT_EYE", RIGHT_EYE));
orderList.insert(make_pair("LEFT_BROW", LEFT_BROW));
orderList.insert(make_pair("RIGHT_BROW", RIGHT_BROW));
orderList.insert(make_pair("LIPS", LIPS));
orderList.insert(make_pair("PART_FOREHEAD_B", PART_FOREHEAD_B));
if (cc->Inputs().HasTag(kLandmarksTag))
{
const LandmarkList &landmarks =
cc->Inputs().Tag(kLandmarksTag).Get<LandmarkList>();
cv::Mat mask;
std::vector<cv::Point> point_array;
int c = 0;
for (const auto &[key, value] : orderList)
{
for (auto order : value)
{
c = 0;
for (int i = 0; i < landmarks.landmark_size(); ++i)
{
const Landmark &landmark = landmarks.landmark(i);
if (!IsLandmarkVisibleAndPresent<Landmark>(
landmark, false,
0.0, false,
0.0))
{
continue;
}
if (order == c)
{
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
point_array.push_back(cv::Point(x, y));
}
c += 1;
}
}
std::vector<std::vector<cv::Point>> point_vec;
point_vec.push_back(point_array);
mask = cv::Mat::zeros(image_mat->size(), CV_32FC1);
cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
all_masks.insert(make_pair(key, mask));
point_vec.clear();
point_array.clear();
}
}
if (cc->Inputs().HasTag(kNormLandmarksTag))
{
const NormalizedLandmarkList &landmarks =
cc->Inputs().Tag(kNormLandmarksTag).Get<NormalizedLandmarkList>();
cv::Mat mask;
std::vector<cv::Point> point_array;
int c = 0;
for (const auto &[key, value] : orderList)
{
for (auto order : value)
{
c = 0;
for (int i = 0; i < landmarks.landmark_size(); ++i)
{
const NormalizedLandmark &landmark = landmarks.landmark(i);
if (!IsLandmarkVisibleAndPresent<NormalizedLandmark>(
landmark, false,
0.0, false,
0.0))
{
continue;
}
if (order == c)
{
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
point_array.push_back(cv::Point(x, y));
}
c += 1;
}
}
std::vector<std::vector<cv::Point>> point_vec;
point_vec.push_back(point_array);
mask = cv::Mat::zeros(image_mat->size(), CV_32FC1);
cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
all_masks.insert(make_pair(key, mask));
point_vec.clear();
point_array.clear();
}
}
MP_RETURN_IF_ERROR(GetFaceBox(cc, image_mat));
MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks));
@ -397,8 +288,96 @@ namespace mediapipe
return absl::OkStatus();
}
/* absl::Status LandmarksToMaskCalculator::GetFaceBox(std::unique_ptr<cv::Mat> &image_mat,
const RenderData &render_data)
absl::Status LandmarksToMaskCalculator::GetMasks(CalculatorContext *cc,
std::unordered_map<std::string, cv::Mat> &all_masks, std::unique_ptr<cv::Mat> &image_mat)
{
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
if (cc->Inputs().HasTag(kLandmarksTag))
{
const LandmarkList &landmarks =
cc->Inputs().Tag(kNormLandmarksTag).Get<LandmarkList>();
cv::Mat mask;
std::vector<cv::Point> point_array;
for (const auto &[key, value] : orderList)
{
for (auto order : value)
{
const Landmark &landmark = landmarks.landmark(order);
if (!IsLandmarkVisibleAndPresent<Landmark>(
landmark, false,
0.0, false,
0.0))
{
continue;
}
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
point_array.push_back(cv::Point(x, y));
}
std::vector<std::vector<cv::Point>> point_vec;
point_vec.push_back(point_array);
mask = cv::Mat::zeros(image_mat->size(), CV_32FC1);
cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
all_masks.insert(make_pair(key, mask));
point_vec.clear();
point_array.clear();
}
}
if (cc->Inputs().HasTag(kNormLandmarksTag))
{
const NormalizedLandmarkList &landmarks =
cc->Inputs().Tag(kNormLandmarksTag).Get<NormalizedLandmarkList>();
cv::Mat mask;
std::vector<cv::Point> point_array;
for (const auto &[key, value] : orderList)
{
for (auto order : value)
{
const NormalizedLandmark &landmark = landmarks.landmark(order);
if (!IsLandmarkVisibleAndPresent<NormalizedLandmark>(
landmark, false,
0.0, false,
0.0))
{
continue;
}
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
point_array.push_back(cv::Point(x, y));
}
std::vector<std::vector<cv::Point>> point_vec;
point_vec.push_back(point_array);
mask = cv::Mat::zeros(image_mat->size(), CV_32FC1);
cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA);
mask.convertTo(mask, CV_8U);
all_masks.insert(make_pair(key, mask));
point_vec.clear();
point_array.clear();
}
}
return absl::OkStatus();
}
absl::Status LandmarksToMaskCalculator::GetFaceBox(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat)
{
cv::Mat mat_image_ = *image_mat.get();
@ -407,6 +386,10 @@ namespace mediapipe
std::vector<int> x_s, y_s;
double box_min_y, box_max_y, box_max_x, box_min_x;
if (cc->Inputs().HasTag(kLandmarksTag))
{
const LandmarkList &landmarks =
cc->Inputs().Tag(kLandmarksTag).Get<LandmarkList>();
for (int i = 0; i < landmarks.landmark_size(); ++i)
{
@ -420,31 +403,53 @@ namespace mediapipe
continue;
}
const auto &point = landmark.point();
const auto &point = landmark;
int x = -1;
int y = -1;
if (point.normalized())
{
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
}
else
{
x = static_cast<int>(point.x() * scale_factor_);
y = static_cast<int>(point.y() * scale_factor_);
}
x_s.push_back(point.x());
x_s.push_back(point.y());
}
}
cv::minMaxLoc(y_s, &box_min_y, &box_max_y);
cv::minMaxLoc(x_s, &box_min_x, &box_max_x);
box_min_y = box_min_y * 0.9;
face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y);
}
if (cc->Inputs().HasTag(kNormLandmarksTag))
{
const NormalizedLandmarkList &landmarks =
cc->Inputs().Tag(kNormLandmarksTag).Get<NormalizedLandmarkList>();
for (int i = 0; i < landmarks.landmark_size(); ++i)
{
const NormalizedLandmark &landmark = landmarks.landmark(i);
if (!IsLandmarkVisibleAndPresent<NormalizedLandmark>(
landmark, false,
0.0, false,
0.0))
{
continue;
}
const auto &point = landmark;
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
x_s.push_back(point.x());
x_s.push_back(point.y());
}
cv::minMaxLoc(y_s, &box_min_y, &box_max_y);
cv::minMaxLoc(x_s, &box_min_x, &box_max_x);
box_min_y = box_min_y * 0.9;
face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y);
}
return absl::OkStatus();
} */
}
REGISTER_CALCULATOR(LandmarksToMaskCalculator);
} // namespace mediapipe

View File

@ -24,22 +24,11 @@
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
#include "absl/memory/memory.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/color.pb.h"
#include "mediapipe/util/render_data.pb.h"
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/vector.h"
@ -85,8 +74,10 @@ namespace mediapipe
absl::Status RenderToCpu(CalculatorContext *cc,
std::unordered_map<std::string, cv::Mat> &all_masks);
absl::Status GetFaceBox(std::unique_ptr<cv::Mat> &image_mat,
const RenderData &render_data);
absl::Status GetFaceBox(CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat);
absl::Status GetMasks(CalculatorContext *cc, std::unordered_map<std::string, cv::Mat> &all_masks, std::unique_ptr<cv::Mat> &image_mat);
absl::Status CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);

View File

@ -38,6 +38,8 @@ android_binary(
assets = [
"//mediapipe/graphs/image_style:mobile_gpu.binarypb",
"//mediapipe/models:model_float32.tflite",
"//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -56,5 +58,7 @@ android_binary(
deps = [
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
],
)

View File

@ -26,12 +26,17 @@ cc_library(
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:tensor_converter_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tensor:tensors_to_segmentation_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
"//mediapipe/calculators/image_style:fast_utils_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
],
)
@ -40,10 +45,16 @@ cc_library(
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensor_converter_calculator",
"//mediapipe/calculators/tensor:tensors_to_segmentation_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
"//mediapipe/calculators/image_style:fast_utils_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
],
)

View File

@ -6,7 +6,6 @@ input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
@ -19,30 +18,57 @@ node {
}
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:throttled_input_video"
output_stream: "IMAGE:image_input_video"
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 }
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
}
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image_input_video"
output_stream: "TENSORS:input_tensor"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
calculator: "FastUtilsCalculator"
input_stream: "NORM_LANDMARKS:multi_face_landmarks"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:out_image_frame"
}
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:out_image_frame"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
}
}
}
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "TENSORS:input_tensor"
options: {
[mediapipe.TensorConverterCalculatorOptions.ext] {
zero_center: true
}
}
}
node {
calculator: "InferenceCalculator"
@ -56,10 +82,16 @@ node {
}
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "SIZE:input_size"
}
node {
calculator: "TensorsToSegmentationCalculator"
input_stream: "TENSORS:output_tensor"
input_stream: "OUTPUT_SIZE:input_size"
output_stream: "MASK:output"
options: {
[mediapipe.TensorsToSegmentationCalculatorOptions.ext] {

View File

@ -18,25 +18,81 @@ node {
output_stream: "throttled_input_video"
}
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "TENSORS:input_tensors"
options {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: false
output_tensor_float_range {
min: -1.0
max: 1.0
}
gpu_origin: TOP_LEFT
border_mode: BORDER_REPLICATE
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
}
}
}
# Defines side packets for further use in the graph.
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "transformed_input_video"
output_stream: "throttled_input_video_cpu"
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
}
node {
calculator: "FastUtilsCalculator"
input_stream: "NORM_LANDMARKS:multi_face_landmarks"
input_stream: "IMAGE:throttled_input_video_cpu"
output_stream: "IMAGE:out_image_frame"
}
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:out_image_frame"
output_stream: "TENSORS:input_tensors"
options: {
[mediapipe.TensorConverterCalculatorOptions.ext] {
zero_center: true
}
}
}
#node: {
# calculator: "ImageToTensorCalculator"
# input_stream: "IMAGE_GPU:throttled_input_video"
# output_stream: "TENSORS:input_tensors"
# options {
# [mediapipe.ImageToTensorCalculatorOptions.ext] {
# output_tensor_width: 256
# output_tensor_height: 256
# keep_aspect_ratio: false
# output_tensor_float_range {
# min: -1.0
# max: 1.0
# }
# gpu_origin: TOP_LEFT
# border_mode: BORDER_REPLICATE
# }
# }
#}
node {
@ -45,8 +101,8 @@ node {
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/models/model_float32.tflite"
delegate { xnnpack {} }
model_path:"mediapipe/models/model_float32.tflite"
delegate { gpu {} }
}
}
}