From 51ed94633e28af84cf40800edeeee87adcf9975e Mon Sep 17 00:00:00 2001 From: mslight Date: Mon, 8 Aug 2022 14:17:50 +0400 Subject: [PATCH] Separate color conversion, affine transform --- .../image/color_convert_calculator.cc | 27 ++ .../framework/formats/image_format.proto | 2 + mediapipe/framework/formats/image_frame.cc | 6 + mediapipe/graphs/deformation/BUILD | 35 +- mediapipe/graphs/deformation/README.md | 5 +- .../graphs/deformation/calculators/BUILD | 18 + .../graphs/deformation/calculators/Tensor.h | 4 + .../calculators/face_processor_calculator.cc | 372 +++++------------- .../calculators/warp_affine_calculator.cc | 285 ++++++++++++++ .../graphs/deformation/deformation_cpu.pbtxt | 38 +- .../deformation/deformation_mobile.pbtxt | 33 +- 11 files changed, 520 insertions(+), 305 deletions(-) create mode 100644 mediapipe/graphs/deformation/calculators/warp_affine_calculator.cc diff --git a/mediapipe/calculators/image/color_convert_calculator.cc b/mediapipe/calculators/image/color_convert_calculator.cc index bdac932bb..054350dff 100644 --- a/mediapipe/calculators/image/color_convert_calculator.cc +++ b/mediapipe/calculators/image/color_convert_calculator.cc @@ -39,10 +39,12 @@ void SetColorChannel(int channel, uint8 value, cv::Mat* mat) { constexpr char kRgbaInTag[] = "RGBA_IN"; constexpr char kRgbInTag[] = "RGB_IN"; constexpr char kBgraInTag[] = "BGRA_IN"; +constexpr char kBgrInTag[] = "BGR_IN"; constexpr char kGrayInTag[] = "GRAY_IN"; constexpr char kRgbaOutTag[] = "RGBA_OUT"; constexpr char kRgbOutTag[] = "RGB_OUT"; constexpr char kBgraOutTag[] = "BGRA_OUT"; +constexpr char kBgrOutTag[] = "BGR_OUT"; constexpr char kGrayOutTag[] = "GRAY_OUT"; } // namespace @@ -57,6 +59,7 @@ constexpr char kGrayOutTag[] = "GRAY_OUT"; // RGB -> RGBA // RGBA -> BGRA // BGRA -> RGBA +// BGRA -> RGB // // This calculator only supports a single input stream and output stream at a // time. If more than one input stream or output stream is present, the @@ -122,6 +125,10 @@ absl::Status ColorConvertCalculator::GetContract(CalculatorContract* cc) { cc->Inputs().Tag(kBgraInTag).Set(); } + if (cc->Inputs().HasTag(kBgrInTag)) { + cc->Inputs().Tag(kBgrInTag).Set(); + } + if (cc->Outputs().HasTag(kRgbOutTag)) { cc->Outputs().Tag(kRgbOutTag).Set(); } @@ -137,6 +144,10 @@ absl::Status ColorConvertCalculator::GetContract(CalculatorContract* cc) { if (cc->Outputs().HasTag(kBgraOutTag)) { cc->Outputs().Tag(kBgraOutTag).Set(); } + + if (cc->Outputs().HasTag(kBgrOutTag)) { + cc->Outputs().Tag(kBgrOutTag).Set(); + } return absl::OkStatus(); } @@ -157,6 +168,7 @@ absl::Status ColorConvertCalculator::ConvertAndOutput( if (open_cv_convert_code == cv::COLOR_RGB2RGBA) { SetColorChannel(3, 255, &output_mat); } + cc->Outputs() .Tag(output_tag) .Add(output_frame.release(), cc->InputTimestamp()); @@ -194,6 +206,21 @@ absl::Status ColorConvertCalculator::Process(CalculatorContext* cc) { return ConvertAndOutput(kRgbaInTag, kBgraOutTag, ImageFormat::SBGRA, cv::COLOR_RGBA2BGRA, cc); } + // RGBA -> BGR + if (cc->Inputs().HasTag(kRgbaInTag) && cc->Outputs().HasTag(kBgrOutTag)) { + return ConvertAndOutput(kRgbaInTag, kBgrOutTag, ImageFormat::SBGR, + cv::COLOR_RGBA2BGR, cc); + } + // BGR -> RGBA + if (cc->Inputs().HasTag(kBgrInTag) && cc->Outputs().HasTag(kRgbaOutTag)) { + return ConvertAndOutput(kBgrInTag, kRgbaOutTag, ImageFormat::SRGBA, + cv::COLOR_BGR2RGBA, cc); + } + // BGRA -> RGB + if (cc->Inputs().HasTag(kBgraInTag) && cc->Outputs().HasTag(kRgbOutTag)) { + return ConvertAndOutput(kBgraInTag, kRgbOutTag, ImageFormat::SRGB, + cv::COLOR_BGRA2RGB, cc); + } return mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) << "Unsupported image format conversion."; diff --git a/mediapipe/framework/formats/image_format.proto b/mediapipe/framework/formats/image_format.proto index 4bedb8cf0..678f893ae 100644 --- a/mediapipe/framework/formats/image_format.proto +++ b/mediapipe/framework/formats/image_format.proto @@ -73,5 +73,7 @@ message ImageFormat { // sBGRA, interleaved: one byte for B, one byte for G, one byte for R, // one byte for alpha or unused. This is the N32 format for Skia. SBGRA = 11; + + SBGR = 13; } } diff --git a/mediapipe/framework/formats/image_frame.cc b/mediapipe/framework/formats/image_frame.cc index 913ffae24..1b266738b 100644 --- a/mediapipe/framework/formats/image_frame.cc +++ b/mediapipe/framework/formats/image_frame.cc @@ -283,6 +283,8 @@ int ImageFrame::NumberOfChannelsForFormat(ImageFormat::Format format) { return 3; case ImageFormat::SBGRA: return 4; + case ImageFormat::SBGR: + return 3; default: LOG(FATAL) << InvalidFormatString(format); } @@ -312,6 +314,8 @@ int ImageFrame::ChannelSizeForFormat(ImageFormat::Format format) { return sizeof(uint8); case ImageFormat::SBGRA: return sizeof(uint8); + case ImageFormat::SBGR: + return sizeof(uint8); default: LOG(FATAL) << InvalidFormatString(format); } @@ -341,6 +345,8 @@ int ImageFrame::ByteDepthForFormat(ImageFormat::Format format) { return 1; case ImageFormat::SBGRA: return 1; + case ImageFormat::SBGR: + return 1; default: LOG(FATAL) << InvalidFormatString(format); } diff --git a/mediapipe/graphs/deformation/BUILD b/mediapipe/graphs/deformation/BUILD index 65dcf9572..233057807 100644 --- a/mediapipe/graphs/deformation/BUILD +++ b/mediapipe/graphs/deformation/BUILD @@ -22,26 +22,33 @@ licenses(["notice"]) package(default_visibility = ["//visibility:public"]) cc_library( - name = "mobile_calculators", + name = "mobile_calculators" , deps = [ - "//mediapipe/graphs/deformation/calculators:face_processor_calculator", - "//mediapipe/calculators/core:flow_limiter_calculator", - "//mediapipe/calculators/image:image_transformation_calculator", - "//mediapipe/modules/face_landmark:face_landmark_front_gpu", - "//mediapipe/calculators/core:constant_side_packet_calculator", - "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", - "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "//mediapipe/graphs/deformation/calculators:face_processor_calculator" , + "//mediapipe/graphs/deformation/calculators:warp_affine_calculator" , + "//mediapipe/calculators/core:flow_limiter_calculator" , + "//mediapipe/calculators/image:image_transformation_calculator" , + "//mediapipe/modules/face_landmark:face_landmark_front_gpu" , + "//mediapipe/calculators/core:constant_side_packet_calculator" , + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator" , + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator" , + "//mediapipe/calculators/image:color_convert_calculator", + "//mediapipe/calculators/image:image_properties_calculator", ], ) cc_library( - name = "desktop_calculators", + name = "desktop_calculators" , deps = [ - "//mediapipe/graphs/deformation/calculators:face_processor_calculator", - "//mediapipe/calculators/core:flow_limiter_calculator", - "//mediapipe/calculators/image:image_transformation_calculator", - "//mediapipe/modules/face_landmark:face_landmark_front_cpu", - "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/graphs/deformation/calculators:face_processor_calculator" , + "//mediapipe/graphs/deformation/calculators:warp_affine_calculator" , + "//mediapipe/calculators/core:flow_limiter_calculator" , + "//mediapipe/calculators/image:image_transformation_calculator" , + "//mediapipe/modules/face_landmark:face_landmark_front_cpu" , + "//mediapipe/calculators/core:constant_side_packet_calculator" , + "//mediapipe/calculators/image:color_convert_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + ], ) diff --git a/mediapipe/graphs/deformation/README.md b/mediapipe/graphs/deformation/README.md index 7a79faddf..bb9a7964f 100644 --- a/mediapipe/graphs/deformation/README.md +++ b/mediapipe/graphs/deformation/README.md @@ -19,10 +19,7 @@ bazel-bin/mediapipe/examples/desktop/deformation/deformation_cpu ``` Run with (using video): ``` -bazel-bin/mediapipe/examples/desktop//deformation/deformation_cpu ---calculator_graph_config_file=mediapipe/graphs/deformation/deformation_cpu.pbtxt ---input_video_path=/path/video.mp4 ---output_video_path=/path/outvideo.mp4 +bazel-bin/mediapipe/examples/desktop/deformation/deformation_cpu --calculator_graph_config_file=mediapipe/graphs/deformation/deformation_cpu.pbtxt --input_video_path=/path/video.mp4 --output_video_path=/path/outvideo.mp4 ``` 2. Mobile (Android) diff --git a/mediapipe/graphs/deformation/calculators/BUILD b/mediapipe/graphs/deformation/calculators/BUILD index 2e8ec4ba0..c462674b6 100644 --- a/mediapipe/graphs/deformation/calculators/BUILD +++ b/mediapipe/graphs/deformation/calculators/BUILD @@ -47,5 +47,23 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "warp_affine_calculator", + srcs = ["warp_affine_calculator.cc"], + hdrs = ["Tensor.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + ], + alwayslink = 1, +) + + diff --git a/mediapipe/graphs/deformation/calculators/Tensor.h b/mediapipe/graphs/deformation/calculators/Tensor.h index be5da18e3..737200d14 100644 --- a/mediapipe/graphs/deformation/calculators/Tensor.h +++ b/mediapipe/graphs/deformation/calculators/Tensor.h @@ -64,6 +64,10 @@ public: return vector(M.ptr(0), M.ptr(0) + dims[1]); } + /* vector get_dims() { + return dims; + } */ + T at(vector _indexes) { return M.at(_indexes.data()); } diff --git a/mediapipe/graphs/deformation/calculators/face_processor_calculator.cc b/mediapipe/graphs/deformation/calculators/face_processor_calculator.cc index 0bc747363..3321933c6 100644 --- a/mediapipe/graphs/deformation/calculators/face_processor_calculator.cc +++ b/mediapipe/graphs/deformation/calculators/face_processor_calculator.cc @@ -35,7 +35,6 @@ #include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/opencv_highgui_inc.h" #include "mediapipe/framework/port/status.h" -#include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/vector.h" #include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/deps/file_path.h" @@ -46,20 +45,12 @@ namespace mediapipe namespace { - constexpr char kImageFrameTag[] = "IMAGE"; + constexpr char kImageSizeTag[] = "SIZE"; constexpr char kVectorTag[] = "VECTOR"; constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; - - tuple _normalized_to_pixel_coordinates(float normalized_x, - float normalized_y, int image_width, int image_height) - { - // Converts normalized value pair to pixel coordinates - int x_px = min(floor(normalized_x * image_width), image_width - 1); - int y_px = min(floor(normalized_y * image_height), image_height - 1); - - return {x_px, y_px}; - }; + constexpr char kSrcTensorTag[] = "SRC_TENSOR"; + constexpr char kDstTensorTag[] = "DST_TENSOR"; inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } @@ -82,7 +73,6 @@ namespace mediapipe *x_px = static_cast(normalized_x) * image_width; *y_px = static_cast(normalized_y) * image_height; *z_px = static_cast(normalized_z) * image_width; - // 2280 return true; } @@ -123,18 +113,12 @@ namespace mediapipe absl::Status Close(CalculatorContext *cc) override; private: - absl::Status CreateRenderTargetCpu(CalculatorContext *cc, - unique_ptr &image_mat, - ImageFormat::Format *target_format); - absl::Status RenderToCpu( - CalculatorContext *cc, const ImageFormat::Format &target_format, - uchar *data_image, unique_ptr &image_mat); + CalculatorContext *cc); absl::Status SetData(CalculatorContext *cc); - absl::Status ProcessImage(CalculatorContext *cc, - ImageFormat::Format &target_format); + absl::Status ProcessImage(CalculatorContext *cc); static absl::StatusOr ReadContentBlobFromFile( const string &unresolved_path) @@ -151,33 +135,16 @@ namespace mediapipe return content_blob; } - // Indicates if image frame is available as input. - bool image_frame_available_ = false; - - unique_ptr image_mat; vector index_names; map> indexes; map> masks; vector> _trianglesIndexes; Tensor __facePts; - - int image_width_; - int image_height_; - - Mat mat_image_; }; absl::Status FaceProcessorCalculator::GetContract(CalculatorContract *cc) { - CHECK_GE(cc->Inputs().NumEntries(), 1); - - if (cc->Inputs().HasTag(kImageFrameTag)) - { - cc->Inputs().Tag(kImageFrameTag).Set(); - CHECK(cc->Outputs().HasTag(kImageFrameTag)); - } - RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) || cc->Inputs().HasTag(kNormLandmarksTag)) << "None of the input streams are provided."; @@ -195,9 +162,17 @@ namespace mediapipe cc->Inputs().Tag(kNormLandmarksTag).Set>(); } - if (cc->Outputs().HasTag(kImageFrameTag)) + if (cc->Inputs().HasTag(kImageSizeTag)) { - cc->Outputs().Tag(kImageFrameTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + } + if (cc->Outputs().HasTag(kSrcTensorTag)) + { + cc->Outputs().Tag(kSrcTensorTag).Set>(); + } + if (cc->Outputs().HasTag(kDstTensorTag)) + { + cc->Outputs().Tag(kDstTensorTag).Set>(); } return absl::OkStatus(); @@ -207,52 +182,19 @@ namespace mediapipe { cc->SetOffset(TimestampDiff(0)); - if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) - { - image_frame_available_ = true; - } - - // Set the output header based on the input header (if present). - const char *tag = kImageFrameTag; - if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty()) - { - const auto &input_header = - cc->Inputs().Tag(tag).Header().Get(); - auto *output_video_header = new VideoHeader(input_header); - cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header)); - } - return absl::OkStatus(); } absl::Status FaceProcessorCalculator::Process(CalculatorContext *cc) { - if (cc->Inputs().HasTag(kImageFrameTag) && - cc->Inputs().Tag(kImageFrameTag).IsEmpty()) - { - return absl::OkStatus(); - } - - // Initialize render target, drawn with OpenCV. - ImageFormat::Format target_format; - - MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); - - mat_image_ = *image_mat.get(); - image_width_ = image_mat->cols; - image_height_ = image_mat->rows; - MP_RETURN_IF_ERROR(SetData(cc)); - + if (cc->Inputs().HasTag(kNormLandmarksTag) && !cc->Inputs().Tag(kNormLandmarksTag).IsEmpty()) { - MP_RETURN_IF_ERROR(ProcessImage(cc, target_format)); + MP_RETURN_IF_ERROR(ProcessImage(cc)); } - uchar *image_mat_ptr = image_mat->data; - MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); - return absl::OkStatus(); } @@ -261,88 +203,11 @@ namespace mediapipe return absl::OkStatus(); } - absl::Status FaceProcessorCalculator::RenderToCpu( - CalculatorContext *cc, const ImageFormat::Format &target_format, - uchar *data_image, unique_ptr &image_mat) - { - - auto output_frame = absl::make_unique( - target_format, mat_image_.cols, mat_image_.rows); - - output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image, - ImageFrame::kDefaultAlignmentBoundary); - - if (cc->Outputs().HasTag(kImageFrameTag)) - { - cc->Outputs() - .Tag(kImageFrameTag) - .Add(output_frame.release(), cc->InputTimestamp()); - } - - return absl::OkStatus(); - } - - absl::Status FaceProcessorCalculator::CreateRenderTargetCpu( - CalculatorContext *cc, unique_ptr &image_mat, - ImageFormat::Format *target_format) - { - if (image_frame_available_) - { - const auto &input_frame = - cc->Inputs().Tag(kImageFrameTag).Get(); - - int target_mat_type; - switch (input_frame.Format()) - { - case ImageFormat::SRGBA: - *target_format = ImageFormat::SRGBA; - target_mat_type = CV_8UC4; - break; - case ImageFormat::SRGB: - *target_format = ImageFormat::SRGB; - target_mat_type = CV_8UC3; - break; - case ImageFormat::GRAY8: - *target_format = ImageFormat::SRGB; - target_mat_type = CV_8UC3; - break; - default: - return absl::UnknownError("Unexpected image frame format."); - break; - } - - image_mat = absl::make_unique( - input_frame.Height(), input_frame.Width(), target_mat_type); - - auto input_mat = formats::MatView(&input_frame); - - if (input_frame.Format() == ImageFormat::GRAY8) - { - Mat rgb_mat; - cvtColor(input_mat, rgb_mat, CV_GRAY2RGBA); - rgb_mat.copyTo(*image_mat); - } - else - { - input_mat.copyTo(*image_mat); - } - } - else - { - image_mat = absl::make_unique( - 1920, 1280, CV_8UC4, - Scalar::all(255.0)); - *target_format = ImageFormat::SRGBA; - } - - return absl::OkStatus(); - } - absl::Status FaceProcessorCalculator::SetData(CalculatorContext *cc) { - masks.clear(); - _trianglesIndexes.clear(); - + masks = {}; + _trianglesIndexes = {}; + string filename = "mediapipe/graphs/deformation/config/triangles.txt"; string content_blob; ASSIGN_OR_RETURN(content_blob, @@ -369,7 +234,7 @@ namespace mediapipe ReadContentBlobFromFile(filename), _ << "Failed to read texture blob from file!"); istringstream stream2(content_blob); - + string line; vector idxs; while (getline(stream2, line)) @@ -381,7 +246,7 @@ namespace mediapipe for (int i = 0; i < index_names.size(); i++) { filename = "./mediapipe/graphs/deformation/config/" + index_names[i] + ".txt"; - + ASSIGN_OR_RETURN(content_blob, ReadContentBlobFromFile(filename), _ << "Failed to read texture blob from file!"); @@ -416,15 +281,16 @@ namespace mediapipe return absl::OkStatus(); } - absl::Status FaceProcessorCalculator::ProcessImage(CalculatorContext *cc, - ImageFormat::Format &target_format) + absl::Status FaceProcessorCalculator::ProcessImage(CalculatorContext *cc) { - double alfaNose = 0.7; - double alfaLips = 0.2; - double alfaCheekbones = 0.2; + double alfaNose = 2.7; + double alfaLips = 0.7; + double alfaCheekbones = 0.7; if (cc->Inputs().HasTag(kNormLandmarksTag)) { + const auto [image_width_, image_height_] = cc->Inputs().Tag(kImageSizeTag).Get>(); + const vector &landmarks = cc->Inputs().Tag(kNormLandmarksTag).Get>(); @@ -434,11 +300,11 @@ namespace mediapipe double **_points = (double **)new double *[n]; for (int i = 0; i < n; i++) _points[i] = (double *)new double[m]; - + for (int i = 0; i < landmarks[0].landmark_size(); ++i) { const NormalizedLandmark &landmark = landmarks[0].landmark(i); - + if (!IsLandmarkVisibleAndPresent( landmark, false, 0.0, false, @@ -446,7 +312,7 @@ namespace mediapipe { continue; } - + const auto &point = landmark; double x = -1; @@ -460,132 +326,74 @@ namespace mediapipe _points[i][2] = z; } __facePts = Tensor(_points, n, m); - } - cvtColor(mat_image_, mat_image_, COLOR_BGRA2RGB); - Mat clone_image = mat_image_.clone(); + Tensor ___facePts = __facePts - 0; - Tensor ___facePts = __facePts - 0; + Tensor _X = __facePts.index(indexes["mediumNoseIndexes"]).index(Range::all(), Range(0, 1)); + Tensor __YZ = __facePts.index(indexes["mediumNoseIndexes"]).index(Range::all(), Range(1, -1)); + Tensor ___YZ = __YZ.concat(Tensor(Mat::ones(9, 1, CV_64F)), 1); + Tensor _b = ___YZ.transpose().matmul(___YZ).inverse().matmul(___YZ.transpose()).matmul(_X); + Tensor _ort = Tensor(Mat::ones(1, 1, CV_64F)).concat(-_b.index(Range(0, 2), Range::all()), 0); + double _D = _b.at({2, 0}) / _ort.norm(); + _ort = _ort / _ort.norm(); - Tensor _X = __facePts.index(indexes["mediumNoseIndexes"]).index(Range::all(), Range(0, 1)); - Tensor __YZ = __facePts.index(indexes["mediumNoseIndexes"]).index(Range::all(), Range(1, -1)); - Tensor ___YZ = __YZ.concat(Tensor(Mat::ones(9, 1, CV_64F)), 1); - Tensor _b = ___YZ.transpose().matmul(___YZ).inverse().matmul(___YZ.transpose()).matmul(_X); - Tensor _ort = Tensor(Mat::ones(1, 1, CV_64F)).concat(-_b.index(Range(0, 2), Range::all()), 0); - double _D = _b.at({2, 0}) / _ort.norm(); - _ort = _ort / _ort.norm(); + Tensor _mask; + Tensor _dsts; + vector _indexes; + _indexes = {"cheekbonesIndexes", "noseAllIndexes", "additionalNoseIndexes1", "additionalNoseIndexes2", "additionalNoseIndexes3"}; - Tensor _mask; - Tensor _dsts; - vector _indexes; - _indexes = {"cheekbonesIndexes", "noseAllIndexes", "additionalNoseIndexes1", "additionalNoseIndexes2", "additionalNoseIndexes3"}; + vector coeffs; + coeffs = {alfaCheekbones * 0.2, alfaNose * 0.2, alfaNose * 0.1, alfaNose * 0.05, alfaNose * 0.025}; - vector coeffs; - coeffs = {alfaCheekbones * 0.2, alfaNose * 0.2, alfaNose * 0.1, alfaNose * 0.05, alfaNose * 0.025}; - - _mask = masks["faceOvalIndexes"]; - _dsts = _mask * (___facePts.matmul(_ort) - _D); - ___facePts = ___facePts + _dsts.matmul(_ort.transpose()) * 0.05; - __facePts = __facePts + _dsts.matmul(_ort.transpose()) * 0.05; - - for (int i = 0; i < 5; i++) - { - _mask = masks[_indexes[i]]; + _mask = masks["faceOvalIndexes"]; _dsts = _mask * (___facePts.matmul(_ort) - _D); - ___facePts = ___facePts - coeffs[i] * _dsts.matmul(_ort.transpose()); + ___facePts = ___facePts + _dsts.matmul(_ort.transpose()) * 0.05; + __facePts = __facePts + _dsts.matmul(_ort.transpose()) * 0.05; + + for (int i = 0; i < 5; i++) + { + _mask = masks[_indexes[i]]; + _dsts = _mask * (___facePts.matmul(_ort) - _D); + ___facePts = ___facePts - coeffs[i] * _dsts.matmul(_ort.transpose()); + } + + _D = -1; + Tensor _lipsSupprotPoint = (___facePts.index(11) + ___facePts.index(16)) / 2; + Tensor _ABC = _lipsSupprotPoint.concat(___facePts.index(291), 0).concat(___facePts.index(61), 0).inverse().matmul(Tensor(Mat::ones(3, 1, CV_64F))) * _D; + _D = _D / _ABC.norm(); + _ort = _ABC / _ABC.norm(); + + _indexes = {"upperLipCnt", "lowerLipCnt", "widerUpperLipPts1", "widerLowerLipPts1"}; + coeffs = {alfaLips, alfaLips * 0.5, alfaLips * 0.5, alfaLips * 0.25}; + + for (int i = 0; i < 4; i++) + { + _mask = masks[_indexes[i]]; + _dsts = _mask * (___facePts.matmul(_ort) - _D); + ___facePts = ___facePts + coeffs[i] * _dsts.matmul(_ort.transpose()); + } + + Tensor tmp_order = ___facePts.index(_trianglesIndexes); + tmp_order = -tmp_order.index(Range::all(), 2) - tmp_order.index(Range::all(), 5) - tmp_order.index(Range::all(), 8); + tmp_order = tmp_order.transpose(); + vector __order = tmp_order.get_1d_data(); + vector _order = tmp_order.sort_indexes(__order); + + Tensor _src = __facePts.index(_trianglesIndexes).index(_order); + Tensor _dst = ___facePts.index(_trianglesIndexes).index(_order); + // cout << _src.get_dims().size() << endl; + auto srcPtr = absl::make_unique>(_src); + cc->Outputs().Tag(kSrcTensorTag).Add(srcPtr.release(), cc->InputTimestamp()); + + auto dstPtr = absl::make_unique>(_dst); + cc->Outputs().Tag(kDstTensorTag).Add(dstPtr.release(), cc->InputTimestamp()); + + return absl::OkStatus(); } - - _D = -1; - Tensor _lipsSupprotPoint = (___facePts.index(11) + ___facePts.index(16)) / 2; - Tensor _ABC = _lipsSupprotPoint.concat(___facePts.index(291), 0).concat(___facePts.index(61), 0).inverse().matmul(Tensor(Mat::ones(3, 1, CV_64F))) * _D; - _D = _D / _ABC.norm(); - _ort = _ABC / _ABC.norm(); - - _indexes = {"upperLipCnt", "lowerLipCnt", "widerUpperLipPts1", "widerLowerLipPts1"}; - coeffs = {alfaLips, alfaLips * 0.5, alfaLips * 0.5, alfaLips * 0.25}; - - for (int i = 0; i < 4; i++) + else { - _mask = masks[_indexes[i]]; - _dsts = _mask * (___facePts.matmul(_ort) - _D); - ___facePts = ___facePts + coeffs[i] * _dsts.matmul(_ort.transpose()); + return absl::OkStatus(); } - - Tensor tmp_order = ___facePts.index(_trianglesIndexes); - tmp_order = -tmp_order.index(Range::all(), 2) - tmp_order.index(Range::all(), 5) - tmp_order.index(Range::all(), 8); - tmp_order = tmp_order.transpose(); - vector __order = tmp_order.get_1d_data(); - vector _order = tmp_order.sort_indexes(__order); - - Tensor _src = __facePts.index(_trianglesIndexes).index(_order); - Tensor _dst = ___facePts.index(_trianglesIndexes).index(_order); - - Mat outImage = mat_image_.clone(); - - for (int i = 0; i < 854; ++i) - { - if (i == 246) - { - int pointer = 0; - } - - Tensor __t1 = _src.index(vector{i}); - Tensor __t2 = _dst.index(vector{i}); - - vector t1; - vector t2; - - for (int i = 0; i < 3; ++i) - { - t1.push_back(Point( - (int)(__t1.at(vector{0, 3 * i})), - (int)(__t1.at(vector{0, 3 * i + 1})))); - t2.push_back(Point( - (int)(__t2.at(vector{0, 3 * i})), - (int)(__t2.at(vector{0, 3 * i + 1})))); - } - - Rect r1 = boundingRect(t1); - Rect r2 = boundingRect(t2); - Point2f srcTri[3]; - Point2f dstTri[3]; - vector t1Rect; - vector t2Rect; - - for (int i = 0; i < 3; ++i) - { - srcTri[i] = Point2f(t1[i].x - r1.x, t1[i].y - r1.y); - dstTri[i] = Point2f(t2[i].x - r2.x, t2[i].y - r2.y); - t1Rect.push_back(Point(t1[i].x - r1.x, t1[i].y - r1.y)); - t2Rect.push_back(Point(t2[i].x - r2.x, t2[i].y - r2.y)); - } - - Mat _dst; - Mat mask = Mat::zeros(r2.height, r2.width, CV_8U); - fillConvexPoly(mask, t2Rect, Scalar(1.0, 1.0, 1.0), 16, 0); - - if (r1.x + r1.width < clone_image.cols && r1.x >= 0 && r1.x + r1.width >= 0 && r1.y >= 0 && r1.y - < clone_image.rows && r1.y + r1.height < clone_image.rows) - { - Mat imgRect = mat_image_(Range(r1.y, r1.y + r1.height), Range(r1.x, r1.x + r1.width)); - Mat warpMat = getAffineTransform(srcTri, dstTri); - warpAffine(imgRect, _dst, warpMat, mask.size()); - - for (int i = r2.y; i < r2.y + r2.height; ++i) - { - for (int j = r2.x; j < r2.x + r2.width; ++j) - { - if ((int)mask.at(i - r2.y, j - r2.x) > 0) - { - outImage.at(i, j) = _dst.at(i - r2.y, j - r2.x); - } - } - } - } - } - cvtColor(outImage, *image_mat, COLOR_RGB2BGRA); - - return absl::OkStatus(); } REGISTER_CALCULATOR(FaceProcessorCalculator); diff --git a/mediapipe/graphs/deformation/calculators/warp_affine_calculator.cc b/mediapipe/graphs/deformation/calculators/warp_affine_calculator.cc new file mode 100644 index 000000000..2762c3461 --- /dev/null +++ b/mediapipe/graphs/deformation/calculators/warp_affine_calculator.cc @@ -0,0 +1,285 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "Tensor.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/status.h" + +using namespace cv; +using namespace std; + +namespace mediapipe +{ + namespace + { + constexpr char kImageFrameTag[] = "IMAGE"; + constexpr char kSrcTag[] = "SRC_TENSOR"; + constexpr char kDstTag[] = "DST_TENSOR"; + + inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } + } // namespace + + class WarpAffineCalculator : public CalculatorBase + { + public: + WarpAffineCalculator() = default; + ~WarpAffineCalculator() override = default; + + static absl::Status GetContract(CalculatorContract *cc); + + absl::Status Open(CalculatorContext *cc) override; + absl::Status Process(CalculatorContext *cc) override; + absl::Status Close(CalculatorContext *cc) override; + + private: + absl::Status CreateRenderTargetCpu(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + + absl::Status RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat); + + absl::Status AffineTransform(CalculatorContext *cc, std::unique_ptr &image_mat, Tensor _src, Tensor _dst); + + bool image_frame_available_ = false; + std::unique_ptr image_mat; + }; + + absl::Status WarpAffineCalculator::GetContract(CalculatorContract *cc) + { + RET_CHECK(cc->Inputs().HasTag(kImageFrameTag)); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + } + if (cc->Inputs().HasTag(kSrcTag)) + { + cc->Inputs().Tag(kSrcTag).Set>(); + } + if (cc->Inputs().HasTag(kDstTag)) + { + cc->Inputs().Tag(kDstTag).Set>(); + } + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs().Tag(kImageFrameTag).Set(); + } + + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) + { + image_frame_available_ = true; + } + + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::Process(CalculatorContext *cc) + { + if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + + ImageFormat::Format target_format; + + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + + if (!cc->Inputs().Tag(kSrcTag).IsEmpty() && !cc->Inputs().Tag(kDstTag).IsEmpty()) + { + const Tensor _src = cc->Inputs().Tag(kSrcTag).Get>(); + const Tensor _dst = cc->Inputs().Tag(kDstTag).Get>(); + MP_RETURN_IF_ERROR(AffineTransform(cc, image_mat, _src, _dst)); + } + + // Copy the rendered image to output. + uchar *image_mat_ptr = image_mat->data; + MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); + + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::Close(CalculatorContext *cc) + { + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat) + { + auto output_frame = absl::make_unique( + target_format, image_mat->cols, image_mat->rows); + + output_frame->CopyPixelData(target_format, image_mat->cols, image_mat->rows, data_image, + ImageFrame::kDefaultAlignmentBoundary); + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs() + .Tag(kImageFrameTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::SBGR: + *target_format = ImageFormat::SBGR; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 1920, 1080, CV_8UC4, + cv::Scalar(cv::Scalar::all(255))); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + absl::Status WarpAffineCalculator::AffineTransform(CalculatorContext *cc, std::unique_ptr &image_mat, Tensor _src, Tensor _dst) + { + Mat mat_image_ = *image_mat.get(); + Mat clone_image = mat_image_.clone(); + + Mat outImage = Mat(mat_image_.size(), mat_image_.type()); + Mat out = mat_image_.clone(); + + for (int i = 0; i < 854; ++i) + { + if (i == 246) + { + int pointer = 0; + } + Tensor __t1 = _src.index(vector{i}); + Tensor __t2 = _dst.index(vector{i}); + + vector t1; + vector t2; + + for (int i = 0; i < 3; ++i) + { + t1.push_back(Point( + (int)(__t1.at(vector{0, 3 * i})), + (int)(__t1.at(vector{0, 3 * i + 1})))); + t2.push_back(Point( + (int)(__t2.at(vector{0, 3 * i})), + (int)(__t2.at(vector{0, 3 * i + 1})))); + } + + cv::Rect r1 = cv::boundingRect(t1); + cv::Rect r2 = cv::boundingRect(t2); + cv::Point2f srcTri[3]; + cv::Point2f dstTri[3]; + std::vector t1Rect; + std::vector t2Rect; + + for (int i = 0; i < 3; ++i) + { + srcTri[i] = Point2f(t1[i].x - r1.x, t1[i].y - r1.y); + dstTri[i] = Point2f(t2[i].x - r2.x, t2[i].y - r2.y); + t1Rect.push_back(Point(t1[i].x - r1.x, t1[i].y - r1.y)); + t2Rect.push_back(Point(t2[i].x - r2.x, t2[i].y - r2.y)); + } + + Mat _dst; + Mat mask = Mat::zeros(r2.height, r2.width, CV_8U); + cv::fillConvexPoly(mask, t2Rect, Scalar(1.0, 1.0, 1.0), 16, 0); + + if (r1.x + r1.width < clone_image.cols && r1.x >= 0 && r1.x + r1.width >= 0 && r1.y >= 0 && r1.y < clone_image.rows && r1.y + r1.height < clone_image.rows) + { + Mat imgRect = mat_image_(Range(r1.y, r1.y + r1.height), Range(r1.x, r1.x + r1.width)); + Mat warpMat = getAffineTransform(srcTri, dstTri); + warpAffine(imgRect, _dst, warpMat, mask.size()); + + for (int i = r2.y; i < r2.y + r2.height; ++i) + { + for (int j = r2.x; j < r2.x + r2.width; ++j) + { + if ((int)mask.at(i - r2.y, j - r2.x) > 0) + { + out.at(i, j) = _dst.at(i - r2.y, j - r2.x); + } + } + } + } + } + out.copyTo(*image_mat); + + return absl::OkStatus(); + } + REGISTER_CALCULATOR(WarpAffineCalculator); +} // namespace mediapipe \ No newline at end of file diff --git a/mediapipe/graphs/deformation/deformation_cpu.pbtxt b/mediapipe/graphs/deformation/deformation_cpu.pbtxt index c4f786da8..11ac2924b 100644 --- a/mediapipe/graphs/deformation/deformation_cpu.pbtxt +++ b/mediapipe/graphs/deformation/deformation_cpu.pbtxt @@ -6,6 +6,13 @@ input_stream: "input_video" # Output image with rendered results. (ImageFrame) output_stream: "output_video" +profiler_config { + trace_enabled: true + enable_profiler: true + trace_log_interval_count: 200 + trace_log_path: "/home/mslight/Work/clone/mediapipe/mediapipe/logs/deformation/" +} + node { calculator: "FlowLimiterCalculator" input_stream: "input_video" @@ -40,9 +47,36 @@ node { output_stream: "LANDMARKS:multi_face_landmarks" } +node { + calculator: "ColorConvertCalculator" + input_stream: "RGBA_IN:throttled_input_video" + output_stream: "BGR_OUT:throttled_input_video_bgr" +} + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "SIZE:size" +} + node { calculator: "FaceProcessorCalculator" input_stream: "NORM_LANDMARKS:multi_face_landmarks" - input_stream: "IMAGE:throttled_input_video" - output_stream: "IMAGE:output_video" + input_stream: "SIZE:size" + output_stream: "SRC_TENSOR:src" + output_stream: "DST_TENSOR:dst" +} + +node { + calculator: "WarpAffineCalculator" + input_stream: "IMAGE:throttled_input_video_bgr" + input_stream: "SRC_TENSOR:src" + input_stream: "DST_TENSOR:dst" + output_stream: "IMAGE:output_video_bgr" +} + +node{ + calculator: "ColorConvertCalculator" + input_stream: "BGR_IN:output_video_bgr" + output_stream: "RGBA_OUT:output_video" } diff --git a/mediapipe/graphs/deformation/deformation_mobile.pbtxt b/mediapipe/graphs/deformation/deformation_mobile.pbtxt index af3d5bc62..8a8abab6c 100644 --- a/mediapipe/graphs/deformation/deformation_mobile.pbtxt +++ b/mediapipe/graphs/deformation/deformation_mobile.pbtxt @@ -44,16 +44,43 @@ node { output_stream: "LANDMARKS:multi_face_landmarks" } +node { + calculator: "ColorConvertCalculator" + input_stream: "RGBA_IN:throttled_input_video_cpu" + output_stream: "BGR_OUT:throttled_input_video_bgr" +} + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "SIZE:size" +} + node { calculator: "FaceProcessorCalculator" input_stream: "NORM_LANDMARKS:multi_face_landmarks" - input_stream: "IMAGE:throttled_input_video_cpu" - output_stream: "IMAGE:out_image_frame" + input_stream: "SIZE:size" + output_stream: "SRC_TENSOR:src" + output_stream: "DST_TENSOR:dst" +} + +node { + calculator: "WarpAffineCalculator" + input_stream: "IMAGE:throttled_input_video_bgr" + input_stream: "SRC_TENSOR:src" + input_stream: "DST_TENSOR:dst" + output_stream: "IMAGE:output_video_bgr" +} + +node{ + calculator: "ColorConvertCalculator" + input_stream: "BGR_IN:output_video_bgr" + output_stream: "RGBA_OUT:output_video_cpu" } # Defines side packets for further use in the graph. node { calculator: "ImageFrameToGpuBufferCalculator" - input_stream: "out_image_frame" + input_stream: "output_video_cpu" output_stream: "output_video" }