From 0c20ce50bd4ab449466bf9760c5d0f49dc9de106 Mon Sep 17 00:00:00 2001 From: mslight Date: Sat, 2 Jul 2022 01:10:59 +0400 Subject: [PATCH] face align ready --- .../beauty/draw_lipstick_calculator.cc | 31 +- .../beauty/smooth_face_calculator.cc | 41 +-- .../beauty/whiten_teeth_calculator.cc | 26 +- mediapipe/calculators/image_style/BUILD | 1 + .../image_style/fast_utils_calculator.cc | 339 ++++++++++++------ .../landmarks/landmarks_to_mask_calculator.cc | 311 ++++++++-------- .../landmarks/landmarks_to_mask_calculator.h | 17 +- .../google/mediapipe/apps/imagestylegpu/BUILD | 4 + mediapipe/graphs/image_style/BUILD | 15 +- .../graphs/image_style/image_style_cpu.pbtxt | 72 ++-- .../graphs/image_style/image_style_gpu.pbtxt | 94 ++++- 11 files changed, 573 insertions(+), 378 deletions(-) diff --git a/mediapipe/calculators/beauty/draw_lipstick_calculator.cc b/mediapipe/calculators/beauty/draw_lipstick_calculator.cc index a898e6da3..971babaee 100644 --- a/mediapipe/calculators/beauty/draw_lipstick_calculator.cc +++ b/mediapipe/calculators/beauty/draw_lipstick_calculator.cc @@ -116,7 +116,7 @@ namespace mediapipe std::string tag = tag_and_index.first; if (tag == kMaskTag) { - cc->Inputs().Get(id).Set>(); + cc->Inputs().Get(id).Set>>(); } else if (tag.empty()) { @@ -180,26 +180,12 @@ namespace mediapipe MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); } - // Render streams onto render target. - for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); - ++id) + const std::vector> &mask_vec = + cc->Inputs().Tag(kMaskTag).Get>>(); + if (mask_vec.size() > 0) { - auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); - std::string tag = tag_and_index.first; - if (!tag.empty() && tag != kMaskTag) - { - continue; - } - if (cc->Inputs().Get(id).IsEmpty()) - { - continue; - } - - RET_CHECK_EQ(kMaskTag, tag); - const std::unordered_map &mask_vec = - cc->Inputs().Get(id).Get>(); - if (mask_vec.size() > 1) - MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask_vec)); + for (auto mask : mask_vec) + MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask)); } // Copy the rendered image to output. @@ -308,8 +294,8 @@ namespace mediapipe //__android_log_print(ANDROID_LOG_ERROR, "OVERSEAS", "%d ", mask_vec[1].size().height); - upper_lips_mask=mask_vec.find("UPPER_LIP")->second; - lower_lips_mask= mask_vec.find("LOWER_LIP")->second; + upper_lips_mask = mask_vec.find("UPPER_LIP")->second; + lower_lips_mask = mask_vec.find("LOWER_LIP")->second; spec_lips_mask = upper_lips_mask + lower_lips_mask; @@ -385,7 +371,6 @@ namespace mediapipe cv::cvtColor(masked_lips_crop, slice_gray, cv::COLOR_RGB2GRAY); masked_lips_crop.copyTo(slice, slice_gray); - } return absl::OkStatus(); diff --git a/mediapipe/calculators/beauty/smooth_face_calculator.cc b/mediapipe/calculators/beauty/smooth_face_calculator.cc index f63abad09..4f96020c6 100644 --- a/mediapipe/calculators/beauty/smooth_face_calculator.cc +++ b/mediapipe/calculators/beauty/smooth_face_calculator.cc @@ -16,6 +16,7 @@ #include #include +#include //#include #include @@ -119,7 +120,7 @@ namespace mediapipe std::string tag = tag_and_index.first; if (tag == kMaskTag) { - cc->Inputs().Get(id).Set>(); + cc->Inputs().Get(id).Set>>(); } else if (tag.empty()) { @@ -129,7 +130,7 @@ namespace mediapipe if (tag == kFaceBoxTag) { - cc->Inputs().Get(id).Set>(); + cc->Inputs().Get(id).Set>>(); } } @@ -178,7 +179,6 @@ namespace mediapipe { return absl::OkStatus(); } - if (cc->Inputs().HasTag(kFaceBoxTag) && cc->Inputs().Tag(kFaceBoxTag).IsEmpty()) { @@ -194,32 +194,18 @@ namespace mediapipe MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); } - // Render streams onto render target. - for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); - ++id) + const std::vector> &mask_vec = + cc->Inputs().Tag(kMaskTag).Get>>(); + + const std::vector> &face_box = + cc->Inputs().Tag(kFaceBoxTag).Get>>(); + + if (mask_vec.size() > 0 && face_box.size() > 0) { - auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); - std::string tag = tag_and_index.first; - if (!tag.empty() && (tag != kMaskTag || tag != kFaceBoxTag)) - { - continue; - } - if (cc->Inputs().Get(id).IsEmpty()) - { - continue; - } - - RET_CHECK_EQ(kMaskTag, tag); - const std::unordered_map &mask_vec = - cc->Inputs().Get(id).Get>(); - - RET_CHECK_EQ(kFaceBoxTag, tag); - const std::tuple &face_box = - cc->Inputs().Get(id).Get>(); - - if (mask_vec.size() > 1) - MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec, face_box)); + for (int i = 0; i < mask_vec.size(); i++) + MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec[i], face_box[i])); } + // Copy the rendered image to output. uchar *image_mat_ptr = image_mat->data; MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); @@ -315,6 +301,7 @@ namespace mediapipe cv::Mat SmoothFaceCalculator::predict_forehead_mask(std::unique_ptr &image_mat, const std::unordered_map &mask_vec, double face_box_min_y) { + cv::Mat mat_image__ = *image_mat.get(); int image_width_ = image_mat->cols; int image_height_ = image_mat->rows; diff --git a/mediapipe/calculators/beauty/whiten_teeth_calculator.cc b/mediapipe/calculators/beauty/whiten_teeth_calculator.cc index 34eaafb17..5881ac89b 100644 --- a/mediapipe/calculators/beauty/whiten_teeth_calculator.cc +++ b/mediapipe/calculators/beauty/whiten_teeth_calculator.cc @@ -114,7 +114,7 @@ namespace mediapipe std::string tag = tag_and_index.first; if (tag == kMaskTag) { - cc->Inputs().Get(id).Set>(); + cc->Inputs().Get(id).Set>>(); } else if (tag.empty()) { @@ -178,26 +178,12 @@ namespace mediapipe MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); } - // Render streams onto render target. - for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); - ++id) + const std::vector> &mask_vec = + cc->Inputs().Tag(kMaskTag).Get>>(); + if (mask_vec.size() > 0) { - auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); - std::string tag = tag_and_index.first; - if (!tag.empty() && tag != kMaskTag) - { - continue; - } - if (cc->Inputs().Get(id).IsEmpty()) - { - continue; - } - - RET_CHECK_EQ(kMaskTag, tag); - const std::unordered_map &mask_vec = - cc->Inputs().Get(id).Get>(); - if (mask_vec.size() > 1) - MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask_vec)); + for (auto mask : mask_vec) + MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask)); } // Copy the rendered image to output. diff --git a/mediapipe/calculators/image_style/BUILD b/mediapipe/calculators/image_style/BUILD index 06ff1b455..c4f4325f2 100644 --- a/mediapipe/calculators/image_style/BUILD +++ b/mediapipe/calculators/image_style/BUILD @@ -31,6 +31,7 @@ cc_library( "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/port:logging", "//mediapipe/framework/port:opencv_core", "//mediapipe/framework/port:opencv_imgproc", diff --git a/mediapipe/calculators/image_style/fast_utils_calculator.cc b/mediapipe/calculators/image_style/fast_utils_calculator.cc index 929b87c0f..257612cbe 100644 --- a/mediapipe/calculators/image_style/fast_utils_calculator.cc +++ b/mediapipe/calculators/image_style/fast_utils_calculator.cc @@ -16,7 +16,6 @@ #include #include -#include #include //#include @@ -29,21 +28,20 @@ #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/opencv_core_inc.h" #include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" #include "mediapipe/framework/port/status.h" -#include "mediapipe/util/annotation_renderer.h" -#include "mediapipe/util/render_data.pb.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/vector.h" -#include "mediapipe/util/color.pb.h" namespace mediapipe { namespace { - static const std::vector FFHQ_NORM_LM = { + static const std::vector FFHQ_NORM_LM = { {638.68525475 / 1024, 486.24604922 / 1024}, {389.31496114 / 1024, 485.8921848 / 1024}, {513.67979275 / 1024, 620.8915371 / 1024}, @@ -52,6 +50,8 @@ namespace mediapipe constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kVectorTag[] = "VECTOR"; + constexpr char kLandmarksTag[] = "LANDMARKS"; + constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; std::tuple _normalized_to_pixel_coordinates(float normalized_x, float normalized_y, int image_width, int image_height) @@ -63,8 +63,8 @@ namespace mediapipe return {x_px, y_px}; }; - static const std::unordered_set FACEMESH_FACE_OVAL = - {{10, 338}, {338, 297}, {297, 332}, {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454}, {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365}, {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152}, {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136}, {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234}, {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103}, {103, 67}, {67, 109}, {109, 10}}; + static const std::vector FACEMESH_FACE_OVAL{ + {10, 338}, {338, 297}, {297, 332}, {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454}, {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365}, {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152}, {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136}, {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234}, {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103}, {103, 67}, {67, 109}, {109, 10}}; enum { @@ -77,8 +77,6 @@ namespace mediapipe size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } - using Point = RenderAnnotation::Point; - bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y, int image_width, int image_height, int *x_px, int *y_px) @@ -99,6 +97,115 @@ namespace mediapipe return true; } + + template + bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark, + bool utilize_visibility, + float visibility_threshold, + bool utilize_presence, + float presence_threshold) + { + if (utilize_visibility && landmark.has_visibility() && + landmark.visibility() < visibility_threshold) + { + return false; + } + if (utilize_presence && landmark.has_presence() && + landmark.presence() < presence_threshold) + { + return false; + } + return true; + } + + std::tuple LandmarkTransform( + cv::Mat &source, + cv::Mat &target, float eps = 1e-7) + { + cv::Mat source_mean_mat, target_mean_mat, source1ch, target1ch; + + cv::reduce(source, source_mean_mat, 0, CV_REDUCE_AVG, CV_32F); + cv::reduce(target, target_mean_mat, 0, CV_REDUCE_AVG, CV_32F); + + source -= {source_mean_mat.at(0, 0), source_mean_mat.at(0, 1)}; + target -= {target_mean_mat.at(0, 0), target_mean_mat.at(0, 1)}; + + source1ch = source.reshape(1, 5); + target1ch = target.reshape(1, 5); + + cv::Mat source_std_mat, target_std_mat; + cv::meanStdDev(source1ch, cv::noArray(), source_std_mat); + cv::meanStdDev(target1ch, cv::noArray(), target_std_mat); + source_std_mat.convertTo(source_std_mat, CV_32F); + target_std_mat.convertTo(target_std_mat, CV_32F); + + float source_std = source_std_mat.at(0, 0); + float target_std = target_std_mat.at(0, 0); + + source /= source_std + eps; + target /= target_std + eps; + + cv::Mat u, vt, rotation, w; + + source1ch = source.reshape(1, 5); + target1ch = target.reshape(1, 5); + + //std::cout << "R (numpy) = " << std::endl << cv::format(source, cv::Formatter::FMT_NUMPY) << std::endl << std::endl; + + cv::SVD::compute(source1ch.t() * target1ch, w, u, vt); + + rotation = (u * vt).t(); + + float scale = target_std / source_std + eps; + cv::Mat translation; + + cv::subtract(target_mean_mat.reshape(1, 2), scale * rotation * source_mean_mat.reshape(1, 2), translation); + + return std::make_tuple(scale, rotation, translation); + } + + std::tuple Crop( + std::unique_ptr &image_mat, + std::tuple roi, float extend = 1.0, + bool square = false, float shift_x = 0.0, float shift_y = 0.0) + { + cv::Mat image = *image_mat.get(); + + int width = image_mat->cols; + int height = image_mat->rows; + + auto &[left, top, right, bottom] = roi; + int y = static_cast((bottom + top) / 2); + int x = static_cast((right + left) / 2); + + int size_y = static_cast(extend * (bottom - top) / 2); + int size_x = static_cast(extend * (right - left) / 2); + + if (square) + size_x = size_y = std::max(size_x, size_y); + + x += static_cast(shift_x * size_x); + y += static_cast(shift_y * size_y); + + roi = std::make_tuple( + std::max(0, x - size_x), + std::max(0, y - size_y), + std::min(x + size_x, width), + std::min(y + size_y, height)); + + image = image(cv::Range(bottom, top), cv::Range(left, right)); + + if (square) + cv::copyMakeBorder( + image, image, std::abs(std::min(0, y - size_y)), + std::abs(std::min(0, height - y - size_y)), + std::abs(std::min(0, x - size_x)), + std::abs(std::min(0, width - x - size_x)), + cv::BORDER_CONSTANT); + + return roi; + } + } // namespace class FastUtilsCalculator : public CalculatorBase @@ -125,30 +232,26 @@ namespace mediapipe absl::Status Call(CalculatorContext *cc, std::unique_ptr &image_mat, - ImageFormat::Format *target_format, - const RenderData &render_data, - std::unordered_map &all_masks); + ImageFormat::Format &target_format, + std::vector> &lms_out); + + absl::Status Align(std::unique_ptr &image_mat, + cv::Mat source_lm, + cv::Mat target_lm = cv::Mat(FFHQ_NORM_LM), cv::Size size = cv::Size(256, 256), + float extend = NULL, std::tuple roi = {NULL, NULL, NULL, NULL}); // Indicates if image frame is available as input. bool image_frame_available_ = false; - std::unordered_map> index_dict = { + std::vector>> index_dict = { {"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}}, {"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}}, {"nose", {4}}, - {"lips", {0, 13, 14, 17, 84}}, + //{"lips", {0, 13, 14, 17, 84}}, {"leftLips", {61, 146}}, {"rightLips", {291, 375}}, }; - int width_ = 0; - int height_ = 0; - int width_canvas_ = 0; // Size of overlay drawing texture canvas. - int height_canvas_ = 0; - - int max_num_faces = 1; - bool refine_landmarks = True; - double min_detection_confidence = 0.5; - double min_tracking_confidence = 0.5; + std::unique_ptr image_mat; }; REGISTER_CALCULATOR(FastUtilsCalculator); @@ -162,6 +265,23 @@ namespace mediapipe CHECK(cc->Outputs().HasTag(kImageFrameTag)); } + RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) || + cc->Inputs().HasTag(kNormLandmarksTag)) + << "None of the input streams are provided."; + RET_CHECK(!(cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().HasTag(kNormLandmarksTag))) + << "Can only one type of landmark can be taken. Either absolute or " + "normalized landmarks."; + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + cc->Inputs().Tag(kLandmarksTag).Set>(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + cc->Inputs().Tag(kNormLandmarksTag).Set>(); + } + if (cc->Outputs().HasTag(kImageFrameTag)) { cc->Outputs().Tag(kImageFrameTag).Set(); @@ -202,53 +322,32 @@ namespace mediapipe { return absl::OkStatus(); } + if (cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().Tag(kLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag) && + cc->Inputs().Tag(kNormLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } // Initialize render target, drawn with OpenCV. - std::unique_ptr image_mat; ImageFormat::Format target_format; - std::unordered_map all_masks; + std::vector> lms_out; - if (cc->Outputs().HasTag(kImageFrameTag)) - { - MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); - } + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); - // Render streams onto render target. - for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); - ++id) - { - auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); - std::string tag = tag_and_index.first; - if (!tag.empty() && tag != kVectorTag) - { - continue; - } - if (cc->Inputs().Get(id).IsEmpty()) - { - continue; - } - if (tag.empty()) - { - // Empty tag defaults to accepting a single object of RenderData type. - const RenderData &render_data = cc->Inputs().Get(id).Get(); - MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks)); - } - else - { - RET_CHECK_EQ(kVectorTag, tag); - const std::vector &render_data_vec = - cc->Inputs().Get(id).Get>(); - for (const RenderData &render_data : render_data_vec) - { - MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks)); - } - } - } + MP_RETURN_IF_ERROR(Call(cc, image_mat, target_format, lms_out)); + + cv::Mat source_lm = cv::Mat(lms_out[0]); + + MP_RETURN_IF_ERROR(Align(image_mat, source_lm)); - // Copy the rendered image to output. uchar *image_mat_ptr = image_mat->data; MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); - + return absl::OkStatus(); } @@ -263,7 +362,7 @@ namespace mediapipe { cv::Mat mat_image_ = *image_mat.get(); - + auto output_frame = absl::make_unique( target_format, mat_image_.cols, mat_image_.rows); @@ -339,61 +438,99 @@ namespace mediapipe absl::Status FastUtilsCalculator::Call(CalculatorContext *cc, std::unique_ptr &image_mat, - ImageFormat::Format *target_format, - const RenderData &render_data, - std::unordered_map &all_masks) + ImageFormat::Format &target_format, + std::vector> &lms_out) { cv::Mat mat_image_ = *image_mat.get(); int image_width_ = image_mat->cols; int image_height_ = image_mat->rows; - cv::Mat mask; - std::vector kps, landmarks; - std::vector> lms_out; - - int c = 0; - - for (const auto &[key, value] : index_dict) + std::vector kps, landmarks; + + if (cc->Inputs().HasTag(kNormLandmarksTag)) { - for (auto order : value) - { - c = 0; - for (auto &annotation : render_data.render_annotations()) + const std::vector &landmarkslist = + cc->Inputs().Tag(kNormLandmarksTag).Get>(); + + std::vector point_array; + for (const auto &face : landmarkslist) + { + for (const auto &[key, value] : index_dict) { - if (annotation.data_case() == RenderAnnotation::kPoint) + for (auto order : value) { - if (order == c) + + const NormalizedLandmark &landmark = face.landmark(order); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) { - const auto &point = annotation.point(); - int x = -1; - int y = -1; - CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, - image_height_, &x, &y)); - kps.push_back(cv::Point(x, y)); + continue; } - c += 1; + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + kps.push_back(cv::Point2f(x, y)); } + + cv::Mat mean; + cv::reduce(kps, mean, 1, CV_REDUCE_AVG, CV_32F); + + landmarks.push_back({mean.at(0, 0), mean.at(0, 1)}); + + kps.clear(); } + lms_out.push_back(landmarks); + + landmarks.clear(); } - double sumx = 0, sumy = 0, meanx, meany; - - for (auto p : kps) - { - sumx += p.x; - sumy += p.y; - } - meanx = sumx / kps.size(); - meany = sumy / kps.size(); - - landmarks.push_back({meanx, meany}); - - kps.clear(); } - lms_out.push_back(landmarks); - return absl::OkStatus(); } + absl::Status FastUtilsCalculator::Align(std::unique_ptr &image_mat, + cv::Mat source_lm, + cv::Mat target_lm, cv::Size size, + float extend, std::tuple roi) + { + cv::Mat mat_image_ = *image_mat.get(); + + cv::Mat source, target; + source_lm.convertTo(source, CV_32F); + target_lm.convertTo(target, CV_32F); + + if (target.at(0, 0) < 1) + { + target *= size.width; + } + + if (std::get<0>(roi) != NULL) + { + roi = Crop(image_mat, roi, extend); + + auto [left, top, right, bottom] = roi; + source(cv::Range(cv::Range::all()), cv::Range(0, 1)) -= left; + source(cv::Range(cv::Range::all()), cv::Range(1, 2)) -= top; + } + auto [scale, rotation, translation] = LandmarkTransform(source, target); + + std::vector vec_mat; + + vec_mat.push_back(scale * rotation); + vec_mat.push_back(translation.reshape(1, {2, 1})); + + cv::Mat transform, image; + cv::hconcat(vec_mat, transform); + + cv::warpAffine(mat_image_, *image_mat, transform, size, 1, 0, 0.0); + + return absl::OkStatus(); + } } // namespace mediapipe diff --git a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc index 64f81b481..600cd8ac3 100644 --- a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc +++ b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc @@ -55,20 +55,18 @@ namespace mediapipe constexpr char kFaceBoxTag[] = "FACEBOX"; constexpr char kImageFrameTag[] = "IMAGE"; - static const std::vector UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}; - static const std::vector LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; - static const std::vector FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, - 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, - 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, - 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, - 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; - static const std::vector MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}; - static const std::vector PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}; - static const std::vector LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}; - static const std::vector RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}; - static const std::vector LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; - static const std::vector LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}; - static const std::vector RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}; + std::unordered_map> orderList = { + {"UPPER_LIP", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}}, + {"LOWER_LIP", {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}}, + {"FACE_OVAL", {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}}, + {"MOUTH_INSIDE", {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}}, + {"LEFT_EYE", {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}}, + {"RIGHT_EYE", {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}}, + {"LEFT_BROW", {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}}, + {"RIGHT_BROW", {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}}, + {"LIPS", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}}, + {"PART_FOREHEAD_B", {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}}, + }; template bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark, @@ -195,119 +193,12 @@ namespace mediapipe std::unique_ptr image_mat; ImageFormat::Format target_format; std::unordered_map all_masks; - + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); - int image_width_ = image_mat->cols; - int image_height_ = image_mat->rows; + MP_RETURN_IF_ERROR(GetMasks(cc, all_masks, image_mat)); - std::unordered_map> orderList; - orderList.insert(make_pair("UPPER_LIP", UPPER_LIP)); - orderList.insert(make_pair("LOWER_LIP", LOWER_LIP)); - orderList.insert(make_pair("FACE_OVAL", FACE_OVAL)); - orderList.insert(make_pair("MOUTH_INSIDE", MOUTH_INSIDE)); - orderList.insert(make_pair("LEFT_EYE", LEFT_EYE)); - orderList.insert(make_pair("RIGHT_EYE", RIGHT_EYE)); - orderList.insert(make_pair("LEFT_BROW", LEFT_BROW)); - orderList.insert(make_pair("RIGHT_BROW", RIGHT_BROW)); - orderList.insert(make_pair("LIPS", LIPS)); - orderList.insert(make_pair("PART_FOREHEAD_B", PART_FOREHEAD_B)); - - if (cc->Inputs().HasTag(kLandmarksTag)) - { - const LandmarkList &landmarks = - cc->Inputs().Tag(kLandmarksTag).Get(); - - cv::Mat mask; - std::vector point_array; - int c = 0; - for (const auto &[key, value] : orderList) - { - for (auto order : value) - { - c = 0; - for (int i = 0; i < landmarks.landmark_size(); ++i) - { - const Landmark &landmark = landmarks.landmark(i); - - if (!IsLandmarkVisibleAndPresent( - landmark, false, - 0.0, false, - 0.0)) - { - continue; - } - - if (order == c) - { - const auto &point = landmark; - int x = -1; - int y = -1; - CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, - image_height_, &x, &y)); - point_array.push_back(cv::Point(x, y)); - } - c += 1; - } - } - std::vector> point_vec; - point_vec.push_back(point_array); - mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); - cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); - mask.convertTo(mask, CV_8U); - all_masks.insert(make_pair(key, mask)); - point_vec.clear(); - point_array.clear(); - } - } - - if (cc->Inputs().HasTag(kNormLandmarksTag)) - { - const NormalizedLandmarkList &landmarks = - cc->Inputs().Tag(kNormLandmarksTag).Get(); - - cv::Mat mask; - std::vector point_array; - int c = 0; - for (const auto &[key, value] : orderList) - { - for (auto order : value) - { - c = 0; - for (int i = 0; i < landmarks.landmark_size(); ++i) - { - const NormalizedLandmark &landmark = landmarks.landmark(i); - - if (!IsLandmarkVisibleAndPresent( - landmark, false, - 0.0, false, - 0.0)) - { - continue; - } - - if (order == c) - { - const auto &point = landmark; - int x = -1; - int y = -1; - CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, - image_height_, &x, &y)); - point_array.push_back(cv::Point(x, y)); - } - c += 1; - } - } - std::vector> point_vec; - point_vec.push_back(point_array); - mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); - cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); - mask.convertTo(mask, CV_8U); - all_masks.insert(make_pair(key, mask)); - point_vec.clear(); - point_array.clear(); - } - } + MP_RETURN_IF_ERROR(GetFaceBox(cc, image_mat)); MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks)); @@ -397,8 +288,96 @@ namespace mediapipe return absl::OkStatus(); } - /* absl::Status LandmarksToMaskCalculator::GetFaceBox(std::unique_ptr &image_mat, - const RenderData &render_data) + absl::Status LandmarksToMaskCalculator::GetMasks(CalculatorContext *cc, + std::unordered_map &all_masks, std::unique_ptr &image_mat) + { + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + const LandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + const Landmark &landmark = landmarks.landmark(order); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + const NormalizedLandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + const NormalizedLandmark &landmark = landmarks.landmark(order); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::GetFaceBox(CalculatorContext *cc, std::unique_ptr &image_mat) { cv::Mat mat_image_ = *image_mat.get(); @@ -407,44 +386,70 @@ namespace mediapipe std::vector x_s, y_s; double box_min_y, box_max_y, box_max_x, box_min_x; + if (cc->Inputs().HasTag(kLandmarksTag)) + { + const LandmarkList &landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); - for (int i = 0; i < landmarks.landmark_size(); ++i) - { - const Landmark &landmark = landmarks.landmark(i); + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const Landmark &landmark = landmarks.landmark(i); - if (!IsLandmarkVisibleAndPresent( - landmark, false, - 0.0, false, - 0.0)) - { - continue; - } + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } - const auto &point = landmark.point(); + const auto &point = landmark; int x = -1; int y = -1; - if (point.normalized()) - { - CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, - image_height_, &x, &y)); - } - else - { - x = static_cast(point.x() * scale_factor_); - y = static_cast(point.y() * scale_factor_); - } + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); x_s.push_back(point.x()); x_s.push_back(point.y()); } - + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); + } + + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + const NormalizedLandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const NormalizedLandmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); } - cv::minMaxLoc(y_s, &box_min_y, &box_max_y); - cv::minMaxLoc(x_s, &box_min_x, &box_max_x); - box_min_y = box_min_y * 0.9; - face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); return absl::OkStatus(); - } */ + } REGISTER_CALCULATOR(LandmarksToMaskCalculator); } // namespace mediapipe diff --git a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h index c8798b350..7454f206c 100644 --- a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h +++ b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h @@ -24,22 +24,11 @@ #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/util/color.pb.h" #include "mediapipe/util/render_data.pb.h" -#include "absl/memory/memory.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "mediapipe/framework/calculator_framework.h" -#include "mediapipe/framework/calculator_options.pb.h" #include "mediapipe/framework/formats/image_format.pb.h" #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame_opencv.h" -#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/port/opencv_core_inc.h" #include "mediapipe/framework/port/opencv_imgproc_inc.h" -#include "mediapipe/framework/formats/location_data.pb.h" -#include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/util/color.pb.h" -#include "mediapipe/util/render_data.pb.h" -#include "absl/strings/str_cat.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/vector.h" @@ -85,8 +74,10 @@ namespace mediapipe absl::Status RenderToCpu(CalculatorContext *cc, std::unordered_map &all_masks); - absl::Status GetFaceBox(std::unique_ptr &image_mat, - const RenderData &render_data); + absl::Status GetFaceBox(CalculatorContext *cc, std::unique_ptr &image_mat); + + absl::Status GetMasks(CalculatorContext *cc, std::unordered_map &all_masks, std::unique_ptr &image_mat); + absl::Status CreateRenderTargetCpu( CalculatorContext *cc, std::unique_ptr &image_mat, ImageFormat::Format *target_format); diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/imagestylegpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/imagestylegpu/BUILD index 9e50d2e74..19b91666a 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/imagestylegpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/imagestylegpu/BUILD @@ -38,6 +38,8 @@ android_binary( assets = [ "//mediapipe/graphs/image_style:mobile_gpu.binarypb", "//mediapipe/models:model_float32.tflite", + "//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite", + "//mediapipe/modules/face_detection:face_detection_short_range.tflite", ], assets_dir = "", manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", @@ -56,5 +58,7 @@ android_binary( deps = [ ":mediapipe_jni_lib", "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", ], ) diff --git a/mediapipe/graphs/image_style/BUILD b/mediapipe/graphs/image_style/BUILD index 27ed8b823..9515ca573 100644 --- a/mediapipe/graphs/image_style/BUILD +++ b/mediapipe/graphs/image_style/BUILD @@ -26,12 +26,17 @@ cc_library( deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:tensor_converter_calculator", "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", "//mediapipe/calculators/util:to_image_calculator", "//mediapipe/calculators/util:from_image_calculator", - "//mediapipe/calculators/image:image_properties_calculator", - "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + "//mediapipe/calculators/image_style:fast_utils_calculator", + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", ], ) @@ -40,10 +45,16 @@ cc_library( deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensor_converter_calculator", "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", "//mediapipe/calculators/util:to_image_calculator", "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/modules/face_landmark:face_landmark_front_cpu", + "//mediapipe/calculators/image_style:fast_utils_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", ], ) diff --git a/mediapipe/graphs/image_style/image_style_cpu.pbtxt b/mediapipe/graphs/image_style/image_style_cpu.pbtxt index 24074ac84..6dcb48f9e 100644 --- a/mediapipe/graphs/image_style/image_style_cpu.pbtxt +++ b/mediapipe/graphs/image_style/image_style_cpu.pbtxt @@ -6,7 +6,6 @@ input_stream: "input_video" # Output image with rendered results. (ImageFrame) output_stream: "output_video" - node { calculator: "FlowLimiterCalculator" input_stream: "input_video" @@ -19,30 +18,57 @@ node { } -node: { - calculator: "ToImageCalculator" - input_stream: "IMAGE_CPU:throttled_input_video" - output_stream: "IMAGE:image_input_video" -} - +# Defines side packets for further use in the graph. node { - calculator: "ImageToTensorCalculator" - input_stream: "IMAGE:image_input_video" - output_stream: "TENSORS:input_tensor" - options: { - [mediapipe.ImageToTensorCalculatorOptions.ext] { - output_tensor_width: 256 - output_tensor_height: 256 - keep_aspect_ratio: true - output_tensor_float_range { - min: -1.0 - max: 1.0 - } - border_mode: BORDER_ZERO + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + packet { bool_value: true } } } } +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" +} + +node { + calculator: "FastUtilsCalculator" + input_stream: "NORM_LANDMARKS:multi_face_landmarks" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:out_image_frame" +} + +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:out_image_frame" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + } + } +} + +node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:transformed_input_video" + output_stream: "TENSORS:input_tensor" + options: { + [mediapipe.TensorConverterCalculatorOptions.ext] { + zero_center: true + } + } +} node { calculator: "InferenceCalculator" @@ -56,10 +82,16 @@ node { } } +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:transformed_input_video" + output_stream: "SIZE:input_size" +} node { calculator: "TensorsToSegmentationCalculator" input_stream: "TENSORS:output_tensor" + input_stream: "OUTPUT_SIZE:input_size" output_stream: "MASK:output" options: { [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { diff --git a/mediapipe/graphs/image_style/image_style_gpu.pbtxt b/mediapipe/graphs/image_style/image_style_gpu.pbtxt index 9ec991440..a1d73a832 100644 --- a/mediapipe/graphs/image_style/image_style_gpu.pbtxt +++ b/mediapipe/graphs/image_style/image_style_gpu.pbtxt @@ -18,25 +18,81 @@ node { output_stream: "throttled_input_video" } -node: { - calculator: "ImageToTensorCalculator" - input_stream: "IMAGE_GPU:throttled_input_video" - output_stream: "TENSORS:input_tensors" - options { - [mediapipe.ImageToTensorCalculatorOptions.ext] { - output_tensor_width: 256 - output_tensor_height: 256 - keep_aspect_ratio: false - output_tensor_float_range { - min: -1.0 - max: 1.0 - } - gpu_origin: TOP_LEFT - border_mode: BORDER_REPLICATE - } - } +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { bool_value: true } + } + } } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "transformed_input_video" + output_stream: "throttled_input_video_cpu" +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" +} + +node { + calculator: "FastUtilsCalculator" + input_stream: "NORM_LANDMARKS:multi_face_landmarks" + input_stream: "IMAGE:throttled_input_video_cpu" + output_stream: "IMAGE:out_image_frame" +} + +node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:out_image_frame" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.TensorConverterCalculatorOptions.ext] { + zero_center: true + } + } +} + +#node: { +# calculator: "ImageToTensorCalculator" +# input_stream: "IMAGE_GPU:throttled_input_video" +# output_stream: "TENSORS:input_tensors" +# options { +# [mediapipe.ImageToTensorCalculatorOptions.ext] { +# output_tensor_width: 256 +# output_tensor_height: 256 +# keep_aspect_ratio: false +# output_tensor_float_range { +# min: -1.0 +# max: 1.0 +# } +# gpu_origin: TOP_LEFT +# border_mode: BORDER_REPLICATE +# } +# } +#} + node { @@ -45,8 +101,8 @@ node { output_stream: "TENSORS:output_tensors" options: { [mediapipe.InferenceCalculatorOptions.ext] { - model_path: "mediapipe/models/model_float32.tflite" - delegate { xnnpack {} } + model_path:"mediapipe/models/model_float32.tflite" + delegate { gpu {} } } } }