overall improvements
This commit is contained in:
parent
5fbb1988f5
commit
c980f0432c
|
@ -14,7 +14,6 @@
|
|||
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
|
@ -23,7 +22,6 @@
|
|||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/video_stream_header.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
|
@ -37,20 +35,13 @@ namespace mediapipe
|
|||
{
|
||||
namespace
|
||||
{
|
||||
static const std::vector<cv::Point2f> FFHQ_NORM_LM = {
|
||||
{638.68525475 / 1024, 486.24604922 / 1024},
|
||||
{389.31496114 / 1024, 485.8921848 / 1024},
|
||||
{513.67979275 / 1024, 620.8915371 / 1024},
|
||||
{405.50932642 / 1024, 756.52797927 / 1024},
|
||||
{622.55630397 / 1024, 756.15509499 / 1024}};
|
||||
|
||||
constexpr char kImageFrameTag[] = "IMAGE";
|
||||
constexpr char kFakeBgTag[] = "FAKE_BG";
|
||||
constexpr char kLmMaskTag[] = "LM_MASK";
|
||||
|
||||
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
|
||||
|
||||
cv::Mat blend_mask(cv::Mat mask_face, cv::Mat mask_bbox, int kernel_size = 33, int reduce_size = 128)
|
||||
absl::StatusOr<cv::Mat> blend_mask(cv::Mat mask_face, cv::Mat mask_bbox, int kernel_size = 33, int reduce_size = 128)
|
||||
{
|
||||
int k_sz = kernel_size;
|
||||
auto [width, height] = mask_face.size();
|
||||
|
@ -71,20 +62,21 @@ namespace mediapipe
|
|||
mask_bbox.convertTo(mask_bbox, CV_32F);
|
||||
cv::GaussianBlur(mask_bbox, mask_bbox, {k_sz, k_sz}, 0);
|
||||
|
||||
cv::Mat mask_bbox_3ch;
|
||||
cv::merge(std::vector{mask_bbox, mask_bbox, mask_bbox}, mask_bbox_3ch);
|
||||
|
||||
cv::Mat mask = mask_bbox_3ch.mul(mask_face);
|
||||
cv::Mat mask = mask_bbox.mul(mask_face);
|
||||
|
||||
cv::Mat img_out;
|
||||
cv::resize(mask, img_out, {width, height});
|
||||
|
||||
for (int i = 1; i < mask_face_0.rows; i++)
|
||||
for (int i = 0; i < mask_face_0.rows; i++)
|
||||
{
|
||||
for (int j = 1; j < mask_face_0.cols; j++)
|
||||
const uchar *ptr_mask_face = mask_face_0.ptr<uchar>(i);
|
||||
float *ptr_img_out = img_out.ptr<float>(i);
|
||||
for (int j = 0; j < mask_face_0.cols; j++)
|
||||
{
|
||||
if (mask_face_0.at<uchar>(i, j) > 0)
|
||||
img_out.at<cv::Vec3b>(i, j) = 1;
|
||||
if (ptr_mask_face[j] > 0)
|
||||
{
|
||||
ptr_img_out[j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -138,7 +130,7 @@ namespace mediapipe
|
|||
}
|
||||
if (cc->Inputs().HasTag(kLmMaskTag))
|
||||
{
|
||||
cc->Inputs().Tag(kLmMaskTag).Set<ImageFrame>();
|
||||
cc->Inputs().Tag(kLmMaskTag).Set<cv::Mat>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kImageFrameTag))
|
||||
{
|
||||
|
@ -181,23 +173,19 @@ namespace mediapipe
|
|||
ImageFormat::Format target_format;
|
||||
std::unique_ptr<cv::Mat> image_mat;
|
||||
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, kImageFrameTag, &target_format));
|
||||
|
||||
|
||||
if (((cc->Inputs().HasTag(kFakeBgTag) &&
|
||||
!cc->Inputs().Tag(kFakeBgTag).IsEmpty())) &&
|
||||
((cc->Inputs().HasTag(kLmMaskTag) &&
|
||||
!cc->Inputs().Tag(kLmMaskTag).IsEmpty())))
|
||||
{
|
||||
// Initialize render target, drawn with OpenCV.
|
||||
std::unique_ptr<cv::Mat> fake_bg;
|
||||
std::unique_ptr<cv::Mat> lm_mask_ptr;
|
||||
const auto &input_fake_bg = cc->Inputs().Tag(kFakeBgTag).Get<ImageFrame>();
|
||||
auto mat_fake_bg_ = formats::MatView(&input_fake_bg);
|
||||
|
||||
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, fake_bg, kFakeBgTag, &target_format));
|
||||
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, lm_mask_ptr, kLmMaskTag, &target_format));
|
||||
|
||||
cv::Mat mat_fake_bg_ = *fake_bg.get();
|
||||
cv::Mat lm_mask = cc->Inputs().Tag(kLmMaskTag).Get<cv::Mat>();
|
||||
|
||||
cv::Mat mat_image_ = *image_mat.get();
|
||||
cv::Mat lm_mask = *lm_mask_ptr.get();
|
||||
|
||||
image_width_ = image_mat->cols;
|
||||
image_height_ = image_mat->rows;
|
||||
|
||||
|
@ -206,11 +194,12 @@ namespace mediapipe
|
|||
cv::transform(roi_mask, roi_mask, cv::Matx13f(1, 1, 1));
|
||||
cv::threshold(roi_mask, roi_mask, 1, 255, CV_THRESH_TRUNC);
|
||||
|
||||
cv::Mat mask = blend_mask(lm_mask, roi_mask, 33);
|
||||
ASSIGN_OR_RETURN(auto mask, blend_mask(lm_mask, roi_mask, 33));
|
||||
|
||||
mat_image_.convertTo(mat_image_, CV_32F);
|
||||
mat_fake_bg_.convertTo(mat_fake_bg_, CV_32F);
|
||||
cv::resize(mat_fake_bg_, mat_fake_bg_, {image_width_, image_height_});
|
||||
cv::merge(std::vector{mask, mask, mask}, mask);
|
||||
|
||||
cv::Mat im_out = mat_fake_bg_.mul(cv::Scalar::all(1) - mask) + mat_image_.mul(mask);
|
||||
|
||||
|
|
|
@ -49,6 +49,14 @@ namespace mediapipe
|
|||
{405.50932642 / 1024, 756.52797927 / 1024},
|
||||
{622.55630397 / 1024, 756.15509499 / 1024}};
|
||||
|
||||
const std::vector<std::pair<std::string, std::vector<int>>> index_dict = {
|
||||
{"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}},
|
||||
{"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}},
|
||||
{"nose", {4}},
|
||||
{"leftLips", {61, 146}},
|
||||
{"rightLips", {291, 375}},
|
||||
};
|
||||
|
||||
constexpr char kImageFrameTag[] = "IMAGE";
|
||||
constexpr char kVectorTag[] = "VECTOR";
|
||||
constexpr char kLandmarksTag[] = "LANDMARKS";
|
||||
|
@ -202,9 +210,6 @@ namespace mediapipe
|
|||
absl::Status Process(CalculatorContext *cc) override;
|
||||
absl::Status Close(CalculatorContext *cc) override;
|
||||
|
||||
protected:
|
||||
mediapipe::FastUtilsCalculatorOptions options_;
|
||||
|
||||
private:
|
||||
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
|
||||
std::unique_ptr<cv::Mat> &image_mat,
|
||||
|
@ -224,22 +229,18 @@ namespace mediapipe
|
|||
cv::Mat target_lm = cv::Mat(FFHQ_NORM_LM), cv::Size size = cv::Size(256, 256),
|
||||
float extend = NULL, std::tuple<float, float, float, float> roi = {NULL, NULL, NULL, NULL});
|
||||
|
||||
absl::Status LoadOptions(CalculatorContext *cc);
|
||||
// Indicates if image frame is available as input.
|
||||
bool image_frame_available_ = false;
|
||||
|
||||
const std::vector<std::pair<std::string, std::vector<int>>> index_dict = {
|
||||
{"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}},
|
||||
{"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}},
|
||||
{"nose", {4}},
|
||||
{"leftLips", {61, 146}},
|
||||
{"rightLips", {291, 375}},
|
||||
};
|
||||
|
||||
cv::Mat mat_image_;
|
||||
cv::Mat lm_mask;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
int orig_width;
|
||||
int orig_height;
|
||||
bool back_to_im;
|
||||
|
||||
::mediapipe::FastUtilsCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(FastUtilsCalculator);
|
||||
|
||||
|
@ -281,7 +282,7 @@ namespace mediapipe
|
|||
|
||||
if (cc->Outputs().HasTag(kLmMaskTag))
|
||||
{
|
||||
cc->Outputs().Tag(kLmMaskTag).Set<ImageFrame>();
|
||||
cc->Outputs().Tag(kLmMaskTag).Set<cv::Mat>();
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
|
@ -290,8 +291,8 @@ namespace mediapipe
|
|||
absl::Status FastUtilsCalculator::Open(CalculatorContext *cc)
|
||||
{
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<mediapipe::FastUtilsCalculatorOptions>();
|
||||
back_to_im = options_.back_to_image();
|
||||
|
||||
MP_RETURN_IF_ERROR(LoadOptions(cc));
|
||||
|
||||
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
|
||||
{
|
||||
|
@ -322,9 +323,14 @@ namespace mediapipe
|
|||
// Initialize render target, drawn with OpenCV.
|
||||
std::unique_ptr<cv::Mat> image_mat;
|
||||
ImageFormat::Format target_format;
|
||||
ImageFormat::Format target_format2;
|
||||
std::vector<std::vector<cv::Point2f>> lms_out;
|
||||
|
||||
const auto size = cc->Inputs().Tag(kSizeTag).Get<std::pair<int, int>>();
|
||||
orig_width = size.first;
|
||||
orig_height = size.second;
|
||||
CHECK_GT(size.first, 0);
|
||||
CHECK_GT(orig_height, 0);
|
||||
|
||||
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
|
||||
mat_image_ = *image_mat.get();
|
||||
image_width_ = image_mat->cols;
|
||||
|
@ -335,33 +341,16 @@ namespace mediapipe
|
|||
{
|
||||
MP_RETURN_IF_ERROR(Call(cc, image_mat, target_format, lms_out));
|
||||
|
||||
if (cc->Outputs().HasTag(kLmMaskTag))
|
||||
{
|
||||
lm_mask.convertTo(lm_mask, CV_8U);
|
||||
|
||||
std::unique_ptr<cv::Mat> lm_mask_ptr = absl::make_unique<cv::Mat>(
|
||||
mat_image_.size(), lm_mask.type());
|
||||
|
||||
lm_mask.copyTo(*lm_mask_ptr);
|
||||
|
||||
target_format2 = ImageFormat::GRAY8;
|
||||
uchar *lm_mask_pt = lm_mask_ptr->data;
|
||||
|
||||
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format2, lm_mask_pt, lm_mask_ptr, kLmMaskTag));
|
||||
}
|
||||
|
||||
if (!back_to_im)
|
||||
{
|
||||
MP_RETURN_IF_ERROR(Align(image_mat, cv::Mat(lms_out[0])));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto &size =
|
||||
cc->Inputs().Tag(kSizeTag).Get<std::pair<int, int>>();
|
||||
cv::Mat tar = cv::Mat(FFHQ_NORM_LM) * 256;
|
||||
|
||||
MP_RETURN_IF_ERROR(Align(image_mat, tar,
|
||||
cv::Mat(lms_out[0]), {size.first, size.second}));
|
||||
cv::Mat(lms_out[0]), {orig_width, orig_height}));
|
||||
}
|
||||
uchar *image_mat_ptr = image_mat->data;
|
||||
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat, kImageFrameTag));
|
||||
|
@ -396,6 +385,17 @@ namespace mediapipe
|
|||
.Add(output_frame.release(), cc->InputTimestamp());
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kLmMaskTag) && !lm_mask.empty())
|
||||
{
|
||||
auto output_lmmask = absl::make_unique<cv::Mat>(lm_mask);
|
||||
|
||||
if (cc->Outputs().HasTag(kLmMaskTag))
|
||||
{
|
||||
cc->Outputs()
|
||||
.Tag(kLmMaskTag)
|
||||
.Add(output_lmmask.release(), cc->InputTimestamp());
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -484,13 +484,11 @@ namespace mediapipe
|
|||
continue;
|
||||
}
|
||||
|
||||
const auto &size =
|
||||
cc->Inputs().Tag(kSizeTag).Get<std::pair<int, int>>();
|
||||
const auto &point = landmark;
|
||||
int x = -1;
|
||||
int y = -1;
|
||||
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), size.first,
|
||||
size.second, &x, &y));
|
||||
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), orig_width,
|
||||
orig_height, &x, &y));
|
||||
kps.push_back(cv::Point2f(x, y));
|
||||
}
|
||||
|
||||
|
@ -519,8 +517,9 @@ namespace mediapipe
|
|||
}
|
||||
std::vector<std::vector<cv::Point>> pts;
|
||||
pts.push_back(kpsint);
|
||||
lm_mask = cv::Mat::zeros(image_mat->size(), CV_32FC1);
|
||||
lm_mask = cv::Mat::zeros({orig_width, orig_height}, CV_32FC1);
|
||||
cv::fillPoly(lm_mask, pts, cv::Scalar::all(1), cv::LINE_AA);
|
||||
lm_mask.convertTo(lm_mask, CV_8U);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -565,4 +564,14 @@ namespace mediapipe
|
|||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status FastUtilsCalculator::LoadOptions(CalculatorContext *cc)
|
||||
{
|
||||
// Get calculator options specified in the graph.
|
||||
options_ = cc->Options<::mediapipe::FastUtilsCalculatorOptions>();
|
||||
RET_CHECK(options_.has_back_to_image());
|
||||
back_to_im = options_.back_to_image();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -97,8 +97,7 @@ namespace mediapipe
|
|||
|
||||
private:
|
||||
absl::Status ProcessCpu(CalculatorContext *cc);
|
||||
|
||||
};
|
||||
};
|
||||
REGISTER_CALCULATOR(TensorsToImageCalculator);
|
||||
|
||||
// static
|
||||
|
@ -116,7 +115,7 @@ namespace mediapipe
|
|||
}
|
||||
|
||||
// Outputs.
|
||||
cc->Outputs().Tag(kImageTag).Set<Image>();
|
||||
cc->Outputs().Tag(kImageTag).Set<ImageFrame>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
@ -137,6 +136,7 @@ namespace mediapipe
|
|||
|
||||
const auto &input_tensors =
|
||||
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
|
||||
RET_CHECK(!input_tensors.empty());
|
||||
|
||||
MP_RETURN_IF_ERROR(ProcessCpu(cc));
|
||||
|
||||
|
@ -184,15 +184,18 @@ namespace mediapipe
|
|||
cv::merge(channels, tensor_mat);
|
||||
|
||||
cv::convertScaleAbs(tensor_mat, tensor_mat);
|
||||
cv::resize(tensor_mat, tensor_mat,
|
||||
cv::Size(output_width, output_height));
|
||||
|
||||
// Send out image as CPU packet.
|
||||
std::shared_ptr<ImageFrame> image_frame = std::make_shared<ImageFrame>(
|
||||
auto output_image = absl::make_unique<ImageFrame>(
|
||||
ImageFormat::SRGB, output_width, output_height);
|
||||
std::unique_ptr<Image> output_image = absl::make_unique<Image>(image_frame);
|
||||
auto output_mat = formats::MatView(output_image.get());
|
||||
// Upsample image into output.
|
||||
cv::resize(tensor_mat, *output_mat,
|
||||
cv::Size(output_width, output_height));
|
||||
|
||||
uchar *data_image = tensor_mat.data;
|
||||
|
||||
output_image->CopyPixelData(ImageFormat::SRGB, tensor_mat.cols, tensor_mat.rows, data_image,
|
||||
ImageFrame::kDefaultAlignmentBoundary);
|
||||
|
||||
cc->Outputs().Tag(kImageTag).Add(output_image.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
|
|
|
@ -25,12 +25,9 @@ cc_library(
|
|||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:tensor_converter_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_gpu",
|
||||
"//mediapipe/calculators/image_style:apply_mask_calculator",
|
||||
|
@ -46,13 +43,10 @@ cc_library(
|
|||
name = "desktop_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensor_converter_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_image_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_front_cpu",
|
||||
"//mediapipe/calculators/image_style:fast_utils_calculator",
|
||||
"//mediapipe/calculators/image_style:apply_mask_calculator",
|
||||
|
|
|
@ -101,14 +101,6 @@ node {
|
|||
output_stream: "IMAGE:fake_image"
|
||||
}
|
||||
|
||||
|
||||
node{
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:fake_image"
|
||||
output_stream: "IMAGE_CPU:fake_image2"
|
||||
}
|
||||
|
||||
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
|
@ -145,29 +137,17 @@ node {
|
|||
}
|
||||
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:transformed_input_img"
|
||||
output_stream: "SIZE:input_size_img"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "TensorsToImageCalculator"
|
||||
input_stream: "TENSORS:output_tensor_img"
|
||||
input_stream: "OUTPUT_SIZE:input_size_img"
|
||||
output_stream: "IMAGE:fake_bg2"
|
||||
output_stream: "IMAGE:fake_bg"
|
||||
}
|
||||
|
||||
node{
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:fake_bg2"
|
||||
output_stream: "IMAGE_CPU:fake_bg"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "FastUtilsCalculator"
|
||||
input_stream: "NORM_LANDMARKS:multi_face_landmarks"
|
||||
input_stream: "IMAGE:fake_image2"
|
||||
input_stream: "IMAGE:fake_image"
|
||||
input_stream: "SIZE:original_size"
|
||||
output_stream: "IMAGE:back_image"
|
||||
options {
|
||||
|
|
|
@ -103,13 +103,7 @@ node {
|
|||
node {
|
||||
calculator: "TensorsToImageCalculator"
|
||||
input_stream: "TENSORS:output_tensor"
|
||||
output_stream: "IMAGE:fake_image"
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:fake_image"
|
||||
output_stream: "IMAGE_CPU:cpu_fake_image"
|
||||
output_stream: "IMAGE:cpu_fake_image"
|
||||
}
|
||||
|
||||
node: {
|
||||
|
@ -142,30 +136,17 @@ node {
|
|||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/models/model_float32.tflite"
|
||||
delegate { xnnpack {} }
|
||||
delegate { gpu {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:transformed_input_img"
|
||||
output_stream: "SIZE:input_size_img"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "TensorsToImageCalculator"
|
||||
input_stream: "TENSORS:output_tensor_img"
|
||||
input_stream: "OUTPUT_SIZE:input_size_img"
|
||||
output_stream: "IMAGE:fake_bg2"
|
||||
output_stream: "IMAGE:fake_bg"
|
||||
}
|
||||
|
||||
node{
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:fake_bg2"
|
||||
output_stream: "IMAGE_CPU:fake_bg"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "FastUtilsCalculator"
|
||||
|
|
Loading…
Reference in New Issue
Block a user