diff --git a/mediapipe/calculators/image_style/apply_mask_calculator.cc b/mediapipe/calculators/image_style/apply_mask_calculator.cc index 35f96f3b7..0fa969dfe 100644 --- a/mediapipe/calculators/image_style/apply_mask_calculator.cc +++ b/mediapipe/calculators/image_style/apply_mask_calculator.cc @@ -14,7 +14,6 @@ #include #include - #include #include "absl/strings/str_cat.h" @@ -23,7 +22,6 @@ #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/video_stream_header.h" -#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/opencv_core_inc.h" #include "mediapipe/framework/port/opencv_imgproc_inc.h" @@ -37,20 +35,13 @@ namespace mediapipe { namespace { - static const std::vector FFHQ_NORM_LM = { - {638.68525475 / 1024, 486.24604922 / 1024}, - {389.31496114 / 1024, 485.8921848 / 1024}, - {513.67979275 / 1024, 620.8915371 / 1024}, - {405.50932642 / 1024, 756.52797927 / 1024}, - {622.55630397 / 1024, 756.15509499 / 1024}}; - constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kFakeBgTag[] = "FAKE_BG"; constexpr char kLmMaskTag[] = "LM_MASK"; inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } - cv::Mat blend_mask(cv::Mat mask_face, cv::Mat mask_bbox, int kernel_size = 33, int reduce_size = 128) + absl::StatusOr blend_mask(cv::Mat mask_face, cv::Mat mask_bbox, int kernel_size = 33, int reduce_size = 128) { int k_sz = kernel_size; auto [width, height] = mask_face.size(); @@ -71,20 +62,21 @@ namespace mediapipe mask_bbox.convertTo(mask_bbox, CV_32F); cv::GaussianBlur(mask_bbox, mask_bbox, {k_sz, k_sz}, 0); - cv::Mat mask_bbox_3ch; - cv::merge(std::vector{mask_bbox, mask_bbox, mask_bbox}, mask_bbox_3ch); - - cv::Mat mask = mask_bbox_3ch.mul(mask_face); + cv::Mat mask = mask_bbox.mul(mask_face); cv::Mat img_out; cv::resize(mask, img_out, {width, height}); - for (int i = 1; i < mask_face_0.rows; i++) + for (int i = 0; i < mask_face_0.rows; i++) { - for (int j = 1; j < mask_face_0.cols; j++) + const uchar *ptr_mask_face = mask_face_0.ptr(i); + float *ptr_img_out = img_out.ptr(i); + for (int j = 0; j < mask_face_0.cols; j++) { - if (mask_face_0.at(i, j) > 0) - img_out.at(i, j) = 1; + if (ptr_mask_face[j] > 0) + { + ptr_img_out[j] = 1; + } } } @@ -138,7 +130,7 @@ namespace mediapipe } if (cc->Inputs().HasTag(kLmMaskTag)) { - cc->Inputs().Tag(kLmMaskTag).Set(); + cc->Inputs().Tag(kLmMaskTag).Set(); } if (cc->Outputs().HasTag(kImageFrameTag)) { @@ -181,23 +173,19 @@ namespace mediapipe ImageFormat::Format target_format; std::unique_ptr image_mat; MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, kImageFrameTag, &target_format)); - + if (((cc->Inputs().HasTag(kFakeBgTag) && !cc->Inputs().Tag(kFakeBgTag).IsEmpty())) && ((cc->Inputs().HasTag(kLmMaskTag) && !cc->Inputs().Tag(kLmMaskTag).IsEmpty()))) { // Initialize render target, drawn with OpenCV. - std::unique_ptr fake_bg; - std::unique_ptr lm_mask_ptr; + const auto &input_fake_bg = cc->Inputs().Tag(kFakeBgTag).Get(); + auto mat_fake_bg_ = formats::MatView(&input_fake_bg); - MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, fake_bg, kFakeBgTag, &target_format)); - MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, lm_mask_ptr, kLmMaskTag, &target_format)); - - cv::Mat mat_fake_bg_ = *fake_bg.get(); + cv::Mat lm_mask = cc->Inputs().Tag(kLmMaskTag).Get(); + cv::Mat mat_image_ = *image_mat.get(); - cv::Mat lm_mask = *lm_mask_ptr.get(); - image_width_ = image_mat->cols; image_height_ = image_mat->rows; @@ -206,11 +194,12 @@ namespace mediapipe cv::transform(roi_mask, roi_mask, cv::Matx13f(1, 1, 1)); cv::threshold(roi_mask, roi_mask, 1, 255, CV_THRESH_TRUNC); - cv::Mat mask = blend_mask(lm_mask, roi_mask, 33); + ASSIGN_OR_RETURN(auto mask, blend_mask(lm_mask, roi_mask, 33)); mat_image_.convertTo(mat_image_, CV_32F); mat_fake_bg_.convertTo(mat_fake_bg_, CV_32F); cv::resize(mat_fake_bg_, mat_fake_bg_, {image_width_, image_height_}); + cv::merge(std::vector{mask, mask, mask}, mask); cv::Mat im_out = mat_fake_bg_.mul(cv::Scalar::all(1) - mask) + mat_image_.mul(mask); diff --git a/mediapipe/calculators/image_style/fast_utils_calculator.cc b/mediapipe/calculators/image_style/fast_utils_calculator.cc index 8039c43f6..721107bc9 100644 --- a/mediapipe/calculators/image_style/fast_utils_calculator.cc +++ b/mediapipe/calculators/image_style/fast_utils_calculator.cc @@ -49,6 +49,14 @@ namespace mediapipe {405.50932642 / 1024, 756.52797927 / 1024}, {622.55630397 / 1024, 756.15509499 / 1024}}; + const std::vector>> index_dict = { + {"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}}, + {"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}}, + {"nose", {4}}, + {"leftLips", {61, 146}}, + {"rightLips", {291, 375}}, + }; + constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kVectorTag[] = "VECTOR"; constexpr char kLandmarksTag[] = "LANDMARKS"; @@ -202,9 +210,6 @@ namespace mediapipe absl::Status Process(CalculatorContext *cc) override; absl::Status Close(CalculatorContext *cc) override; - protected: - mediapipe::FastUtilsCalculatorOptions options_; - private: absl::Status CreateRenderTargetCpu(CalculatorContext *cc, std::unique_ptr &image_mat, @@ -224,22 +229,18 @@ namespace mediapipe cv::Mat target_lm = cv::Mat(FFHQ_NORM_LM), cv::Size size = cv::Size(256, 256), float extend = NULL, std::tuple roi = {NULL, NULL, NULL, NULL}); + absl::Status LoadOptions(CalculatorContext *cc); // Indicates if image frame is available as input. bool image_frame_available_ = false; - - const std::vector>> index_dict = { - {"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}}, - {"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}}, - {"nose", {4}}, - {"leftLips", {61, 146}}, - {"rightLips", {291, 375}}, - }; - cv::Mat mat_image_; cv::Mat lm_mask; int image_width_; int image_height_; + int orig_width; + int orig_height; bool back_to_im; + + ::mediapipe::FastUtilsCalculatorOptions options_; }; REGISTER_CALCULATOR(FastUtilsCalculator); @@ -281,7 +282,7 @@ namespace mediapipe if (cc->Outputs().HasTag(kLmMaskTag)) { - cc->Outputs().Tag(kLmMaskTag).Set(); + cc->Outputs().Tag(kLmMaskTag).Set(); } return absl::OkStatus(); @@ -290,8 +291,8 @@ namespace mediapipe absl::Status FastUtilsCalculator::Open(CalculatorContext *cc) { cc->SetOffset(TimestampDiff(0)); - options_ = cc->Options(); - back_to_im = options_.back_to_image(); + + MP_RETURN_IF_ERROR(LoadOptions(cc)); if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) { @@ -322,9 +323,14 @@ namespace mediapipe // Initialize render target, drawn with OpenCV. std::unique_ptr image_mat; ImageFormat::Format target_format; - ImageFormat::Format target_format2; std::vector> lms_out; + const auto size = cc->Inputs().Tag(kSizeTag).Get>(); + orig_width = size.first; + orig_height = size.second; + CHECK_GT(size.first, 0); + CHECK_GT(orig_height, 0); + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); mat_image_ = *image_mat.get(); image_width_ = image_mat->cols; @@ -335,33 +341,16 @@ namespace mediapipe { MP_RETURN_IF_ERROR(Call(cc, image_mat, target_format, lms_out)); - if (cc->Outputs().HasTag(kLmMaskTag)) - { - lm_mask.convertTo(lm_mask, CV_8U); - - std::unique_ptr lm_mask_ptr = absl::make_unique( - mat_image_.size(), lm_mask.type()); - - lm_mask.copyTo(*lm_mask_ptr); - - target_format2 = ImageFormat::GRAY8; - uchar *lm_mask_pt = lm_mask_ptr->data; - - MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format2, lm_mask_pt, lm_mask_ptr, kLmMaskTag)); - } - if (!back_to_im) { MP_RETURN_IF_ERROR(Align(image_mat, cv::Mat(lms_out[0]))); } else { - const auto &size = - cc->Inputs().Tag(kSizeTag).Get>(); cv::Mat tar = cv::Mat(FFHQ_NORM_LM) * 256; MP_RETURN_IF_ERROR(Align(image_mat, tar, - cv::Mat(lms_out[0]), {size.first, size.second})); + cv::Mat(lms_out[0]), {orig_width, orig_height})); } uchar *image_mat_ptr = image_mat->data; MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat, kImageFrameTag)); @@ -396,6 +385,17 @@ namespace mediapipe .Add(output_frame.release(), cc->InputTimestamp()); } + if (cc->Outputs().HasTag(kLmMaskTag) && !lm_mask.empty()) + { + auto output_lmmask = absl::make_unique(lm_mask); + + if (cc->Outputs().HasTag(kLmMaskTag)) + { + cc->Outputs() + .Tag(kLmMaskTag) + .Add(output_lmmask.release(), cc->InputTimestamp()); + } + } return absl::OkStatus(); } @@ -484,13 +484,11 @@ namespace mediapipe continue; } - const auto &size = - cc->Inputs().Tag(kSizeTag).Get>(); const auto &point = landmark; int x = -1; int y = -1; - CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), size.first, - size.second, &x, &y)); + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), orig_width, + orig_height, &x, &y)); kps.push_back(cv::Point2f(x, y)); } @@ -519,8 +517,9 @@ namespace mediapipe } std::vector> pts; pts.push_back(kpsint); - lm_mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + lm_mask = cv::Mat::zeros({orig_width, orig_height}, CV_32FC1); cv::fillPoly(lm_mask, pts, cv::Scalar::all(1), cv::LINE_AA); + lm_mask.convertTo(lm_mask, CV_8U); } } @@ -565,4 +564,14 @@ namespace mediapipe return absl::OkStatus(); } + + absl::Status FastUtilsCalculator::LoadOptions(CalculatorContext *cc) + { + // Get calculator options specified in the graph. + options_ = cc->Options<::mediapipe::FastUtilsCalculatorOptions>(); + RET_CHECK(options_.has_back_to_image()); + back_to_im = options_.back_to_image(); + + return absl::OkStatus(); + } } // namespace mediapipe diff --git a/mediapipe/calculators/tensor/tensors_to_image_calculator.cc b/mediapipe/calculators/tensor/tensors_to_image_calculator.cc index 3221bae9b..b9bfd2f9a 100644 --- a/mediapipe/calculators/tensor/tensors_to_image_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_image_calculator.cc @@ -97,8 +97,7 @@ namespace mediapipe private: absl::Status ProcessCpu(CalculatorContext *cc); - - }; + }; REGISTER_CALCULATOR(TensorsToImageCalculator); // static @@ -116,7 +115,7 @@ namespace mediapipe } // Outputs. - cc->Outputs().Tag(kImageTag).Set(); + cc->Outputs().Tag(kImageTag).Set(); return absl::OkStatus(); } @@ -137,6 +136,7 @@ namespace mediapipe const auto &input_tensors = cc->Inputs().Tag(kTensorsTag).Get>(); + RET_CHECK(!input_tensors.empty()); MP_RETURN_IF_ERROR(ProcessCpu(cc)); @@ -184,15 +184,18 @@ namespace mediapipe cv::merge(channels, tensor_mat); cv::convertScaleAbs(tensor_mat, tensor_mat); + cv::resize(tensor_mat, tensor_mat, + cv::Size(output_width, output_height)); // Send out image as CPU packet. - std::shared_ptr image_frame = std::make_shared( + auto output_image = absl::make_unique( ImageFormat::SRGB, output_width, output_height); - std::unique_ptr output_image = absl::make_unique(image_frame); - auto output_mat = formats::MatView(output_image.get()); - // Upsample image into output. - cv::resize(tensor_mat, *output_mat, - cv::Size(output_width, output_height)); + + uchar *data_image = tensor_mat.data; + + output_image->CopyPixelData(ImageFormat::SRGB, tensor_mat.cols, tensor_mat.rows, data_image, + ImageFrame::kDefaultAlignmentBoundary); + cc->Outputs().Tag(kImageTag).Add(output_image.release(), cc->InputTimestamp()); return absl::OkStatus(); diff --git a/mediapipe/graphs/image_style/BUILD b/mediapipe/graphs/image_style/BUILD index c7350c9b3..c9c9f3b0a 100644 --- a/mediapipe/graphs/image_style/BUILD +++ b/mediapipe/graphs/image_style/BUILD @@ -25,12 +25,9 @@ cc_library( name = "mobile_calculators", deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", - "//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/tensor:tensor_converter_calculator", "//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/image:image_transformation_calculator", - "//mediapipe/calculators/util:to_image_calculator", - "//mediapipe/calculators/util:from_image_calculator", "//mediapipe/calculators/image:image_properties_calculator", "//mediapipe/modules/face_landmark:face_landmark_front_gpu", "//mediapipe/calculators/image_style:apply_mask_calculator", @@ -46,13 +43,10 @@ cc_library( name = "desktop_calculators", deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", - "//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/image:image_transformation_calculator", "//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/tensor:tensor_converter_calculator", "//mediapipe/calculators/tensor:tensors_to_image_calculator", - "//mediapipe/calculators/util:to_image_calculator", - "//mediapipe/calculators/util:from_image_calculator", "//mediapipe/modules/face_landmark:face_landmark_front_cpu", "//mediapipe/calculators/image_style:fast_utils_calculator", "//mediapipe/calculators/image_style:apply_mask_calculator", diff --git a/mediapipe/graphs/image_style/image_style_cpu.pbtxt b/mediapipe/graphs/image_style/image_style_cpu.pbtxt index e9ec9f806..cad9858d8 100644 --- a/mediapipe/graphs/image_style/image_style_cpu.pbtxt +++ b/mediapipe/graphs/image_style/image_style_cpu.pbtxt @@ -101,14 +101,6 @@ node { output_stream: "IMAGE:fake_image" } - -node{ - calculator: "FromImageCalculator" - input_stream: "IMAGE:fake_image" - output_stream: "IMAGE_CPU:fake_image2" -} - - node: { calculator: "ImageTransformationCalculator" input_stream: "IMAGE:input_video" @@ -145,29 +137,17 @@ node { } -node { - calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE:transformed_input_img" - output_stream: "SIZE:input_size_img" -} - node { calculator: "TensorsToImageCalculator" input_stream: "TENSORS:output_tensor_img" - input_stream: "OUTPUT_SIZE:input_size_img" - output_stream: "IMAGE:fake_bg2" + output_stream: "IMAGE:fake_bg" } -node{ - calculator: "FromImageCalculator" - input_stream: "IMAGE:fake_bg2" - output_stream: "IMAGE_CPU:fake_bg" -} node { calculator: "FastUtilsCalculator" input_stream: "NORM_LANDMARKS:multi_face_landmarks" - input_stream: "IMAGE:fake_image2" + input_stream: "IMAGE:fake_image" input_stream: "SIZE:original_size" output_stream: "IMAGE:back_image" options { diff --git a/mediapipe/graphs/image_style/image_style_gpu.pbtxt b/mediapipe/graphs/image_style/image_style_gpu.pbtxt index 7e5a1fb40..a46acaf67 100644 --- a/mediapipe/graphs/image_style/image_style_gpu.pbtxt +++ b/mediapipe/graphs/image_style/image_style_gpu.pbtxt @@ -103,13 +103,7 @@ node { node { calculator: "TensorsToImageCalculator" input_stream: "TENSORS:output_tensor" - output_stream: "IMAGE:fake_image" -} - -node: { - calculator: "FromImageCalculator" - input_stream: "IMAGE:fake_image" - output_stream: "IMAGE_CPU:cpu_fake_image" + output_stream: "IMAGE:cpu_fake_image" } node: { @@ -142,30 +136,17 @@ node { options: { [mediapipe.InferenceCalculatorOptions.ext] { model_path: "mediapipe/models/model_float32.tflite" - delegate { xnnpack {} } + delegate { gpu {} } } } } - -node { - calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE_CPU:transformed_input_img" - output_stream: "SIZE:input_size_img" -} - node { calculator: "TensorsToImageCalculator" input_stream: "TENSORS:output_tensor_img" - input_stream: "OUTPUT_SIZE:input_size_img" - output_stream: "IMAGE:fake_bg2" + output_stream: "IMAGE:fake_bg" } -node{ - calculator: "FromImageCalculator" - input_stream: "IMAGE:fake_bg2" - output_stream: "IMAGE_CPU:fake_bg" -} node { calculator: "FastUtilsCalculator"