diff --git a/mediapipe/calculators/beauty/BUILD b/mediapipe/calculators/beauty/BUILD new file mode 100644 index 000000000..16ff31356 --- /dev/null +++ b/mediapipe/calculators/beauty/BUILD @@ -0,0 +1,128 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "draw_lipstick_calculator", + srcs = ["draw_lipstick_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/util:color_cc_proto", + "@com_google_absl//absl/strings", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:vector", + "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", + ], + alwayslink = 1, +) + + +cc_library( + name = "form_face_mask_calculator", + srcs = ["form_face_mask_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/util:color_cc_proto", + "@com_google_absl//absl/strings", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:vector", + "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", + ], + alwayslink = 1, +) + + + +cc_library( + name = "smooth_face_calculator", + srcs = ["smooth_face_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/util:color_cc_proto", + "@com_google_absl//absl/strings", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:vector", + "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", + ], + alwayslink = 1, +) + +cc_library( + name = "whiten_teeth_calculator", + srcs = ["whiten_teeth_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/util:color_cc_proto", + "@com_google_absl//absl/strings", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:vector", + "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", + ], + alwayslink = 1, +) + + + + + + diff --git a/mediapipe/calculators/beauty/draw_lipstick_calculator.cc b/mediapipe/calculators/beauty/draw_lipstick_calculator.cc new file mode 100644 index 000000000..a898e6da3 --- /dev/null +++ b/mediapipe/calculators/beauty/draw_lipstick_calculator.cc @@ -0,0 +1,394 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +//#include + +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/util/annotation_renderer.h" +#include "mediapipe/util/render_data.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/vector.h" +#include "mediapipe/util/color.pb.h" + +namespace mediapipe +{ + namespace + { + + constexpr char kMaskTag[] = "MASK"; + constexpr char kImageFrameTag[] = "IMAGE"; + + enum + { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + NUM_ATTRIBUTES + }; + + // Round up n to next multiple of m. + size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT + inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } + + using Point = RenderAnnotation::Point; + + } // namespace + + class DrawLipstickCalculator : public CalculatorBase + { + public: + DrawLipstickCalculator() = default; + ~DrawLipstickCalculator() override = default; + + static absl::Status GetContract(CalculatorContract *cc); + + // From Calculator. + absl::Status Open(CalculatorContext *cc) override; + absl::Status Process(CalculatorContext *cc) override; + absl::Status Close(CalculatorContext *cc) override; + + private: + absl::Status CreateRenderTargetCpu(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + + absl::Status RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat); + + absl::Status DrawLipstick(CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec); + + // Indicates if image frame is available as input. + bool image_frame_available_ = false; + std::unordered_map all_masks; + int width_ = 0; + int height_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; + }; + REGISTER_CALCULATOR(DrawLipstickCalculator); + + absl::Status DrawLipstickCalculator::GetContract(CalculatorContract *cc) + { + CHECK_GE(cc->Inputs().NumEntries(), 1); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + CHECK(cc->Outputs().HasTag(kImageFrameTag)); + } + + // Data streams to render. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kMaskTag) + { + cc->Inputs().Get(id).Set>(); + } + else if (tag.empty()) + { + // Empty tag defaults to accepting a single object of Mat type. + cc->Inputs().Get(id).Set(); + } + } + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs().Tag(kImageFrameTag).Set(); + } + + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) + { + image_frame_available_ = true; + } + else + { + } + + // Set the output header based on the input header (if present). + const char *tag = kImageFrameTag; + if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty()) + { + const auto &input_header = + cc->Inputs().Tag(tag).Header().Get(); + auto *output_video_header = new VideoHeader(input_header); + cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header)); + } + + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::Process(CalculatorContext *cc) + { + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kMaskTag) && + cc->Inputs().Tag(kMaskTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + std::unique_ptr image_mat; + ImageFormat::Format target_format; + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + } + + // Render streams onto render target. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && tag != kMaskTag) + { + continue; + } + if (cc->Inputs().Get(id).IsEmpty()) + { + continue; + } + + RET_CHECK_EQ(kMaskTag, tag); + const std::unordered_map &mask_vec = + cc->Inputs().Get(id).Get>(); + if (mask_vec.size() > 1) + MP_RETURN_IF_ERROR(DrawLipstick(cc, image_mat, &target_format, mask_vec)); + } + + // Copy the rendered image to output. + uchar *image_mat_ptr = image_mat->data; + MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); + + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::Close(CalculatorContext *cc) + { + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat) + { + + cv::Mat mat_image_ = *image_mat.get(); + + auto output_frame = absl::make_unique( + target_format, mat_image_.cols, mat_image_.rows); + + output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image, + ImageFrame::kDefaultAlignmentBoundary); + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs() + .Tag(kImageFrameTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC4, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + absl::Status DrawLipstickCalculator::DrawLipstick(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec) + { + cv::Mat mat_image__ = *image_mat.get(); + + cv::Mat spec_lips_mask, upper_lips_mask, lower_lips_mask; + spec_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F); + upper_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F); + lower_lips_mask = cv::Mat::zeros(mat_image__.size(), CV_32F); + + //__android_log_print(ANDROID_LOG_ERROR, "OVERSEAS", "%d ", mask_vec[1].size().height); + + upper_lips_mask=mask_vec.find("UPPER_LIP")->second; + lower_lips_mask= mask_vec.find("LOWER_LIP")->second; + + spec_lips_mask = upper_lips_mask + lower_lips_mask; + + spec_lips_mask.convertTo(spec_lips_mask, CV_8U); + + cv::resize(spec_lips_mask, spec_lips_mask, mat_image__.size(), cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(spec_lips_mask, location); + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + if (!(x.empty()) && !(y.empty())) + { + + double min_y, max_y, max_x, min_x; + cv::minMaxLoc(y, &min_y, &max_y); + cv::minMaxLoc(x, &min_x, &max_x); + + cv::Mat lips_crop_mask = spec_lips_mask(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255); + + cv::Mat lips_crop = cv::Mat(mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x))); + + cv::Mat lips_blend = cv::Mat(lips_crop.size().height, lips_crop.size().width, CV_32FC4, cv::Scalar(255.0, 0, 0, 0)); + + std::vector channels(4); + + cv::split(lips_blend, channels); + channels[3] = lips_crop_mask * 20; + + cv::merge(channels, lips_blend); + + cv::Mat tmp_lip_mask; + + channels[3].convertTo(tmp_lip_mask, CV_32FC1, 1.0 / 255); + + cv::split(lips_blend, channels); + for (auto &ch : channels) + { + cv::multiply(ch, tmp_lip_mask, ch, 1.0, CV_32F); + } + cv::merge(channels, lips_blend); + + cv::subtract(1.0, tmp_lip_mask, tmp_lip_mask, cv::noArray(), CV_32F); + + cv::split(lips_crop, channels); + for (auto &ch : channels) + { + cv::multiply(ch, tmp_lip_mask, ch, 1.0, CV_8U); + } + cv::merge(channels, lips_crop); + + cv::add(lips_blend, lips_crop, lips_crop, cv::noArray(), CV_8U); + + lips_crop = cv::abs(lips_crop); + + cvtColor(lips_crop, lips_crop, cv::COLOR_RGBA2RGB); + + cv::Mat slice = mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + lips_crop_mask.convertTo(lips_crop_mask, slice.type()); + slice.copyTo(slice, lips_crop_mask); + + cv::Mat masked_lips_crop, slice_gray; + lips_crop.copyTo(masked_lips_crop, lips_crop_mask); + + cv::cvtColor(masked_lips_crop, slice_gray, cv::COLOR_RGB2GRAY); + + masked_lips_crop.copyTo(slice, slice_gray); + + } + + return absl::OkStatus(); + } + +} // namespace mediapipe diff --git a/mediapipe/calculators/beauty/form_face_mask_calculator.cc b/mediapipe/calculators/beauty/form_face_mask_calculator.cc new file mode 100644 index 000000000..4650df5ab --- /dev/null +++ b/mediapipe/calculators/beauty/form_face_mask_calculator.cc @@ -0,0 +1,445 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +//#include + +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/util/annotation_renderer.h" +#include "mediapipe/util/render_data.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/vector.h" +#include "mediapipe/util/color.pb.h" + +namespace mediapipe +{ + namespace + { + + constexpr char kVectorTag[] = "VECTOR"; + constexpr char kMaskTag[] = "MASK"; + constexpr char kFaceBoxTag[] = "FACEBOX"; + constexpr char kImageFrameTag[] = "IMAGE"; + + enum + { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + NUM_ATTRIBUTES + }; + + // Round up n to next multiple of m. + size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT + + // When using GPU, this color will become transparent when the calculator + // merges the annotation overlay with the image frame. As a result, drawing in + // this color is not supported and it should be set to something unlikely used. + constexpr uchar kAnnotationBackgroundColor = 2; // Grayscale value. + + // Future Image type. + inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } + + static const std::vector UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}; + static const std::vector LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; + static const std::vector FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, + 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, + 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, + 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, + 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; + static const std::vector MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}; + static const std::vector PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}; + static const std::vector LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}; + static const std::vector RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}; + static const std::vector LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; + static const std::vector LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}; + static const std::vector RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}; + + bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y, + int image_width, int image_height, int *x_px, + int *y_px) + { + CHECK(x_px != nullptr); + CHECK(y_px != nullptr); + CHECK_GT(image_width, 0); + CHECK_GT(image_height, 0); + + if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 || + normalized_y > 1.0) + { + VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0"; + } + + *x_px = static_cast(round(normalized_x * image_width)); + *y_px = static_cast(round(normalized_y * image_height)); + + return true; + } + + } // namespace + + class FormFaceMaskCalculator : public CalculatorBase + { + public: + FormFaceMaskCalculator() = default; + ~FormFaceMaskCalculator() override = default; + + static absl::Status GetContract(CalculatorContract *cc); + + // From Calculator. + absl::Status Open(CalculatorContext *cc) override; + absl::Status Process(CalculatorContext *cc) override; + absl::Status Close(CalculatorContext *cc) override; + + private: + absl::Status CreateRenderTargetCpu(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + + absl::Status RenderToCpu(CalculatorContext *cc, std::unordered_map &all_masks); + + absl::Status FormFacePartMask(CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const RenderData &render_data, + std::unordered_map &all_masks); + + absl::Status GetFaceBox(std::unique_ptr &image_mat, + const RenderData &render_data); + + // Indicates if image frame is available as input. + bool image_frame_available_ = false; + + int width_ = 0; + int height_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; + float scale_factor_ = 1.0; + std::tuple face_box; + }; + REGISTER_CALCULATOR(FormFaceMaskCalculator); + + absl::Status FormFaceMaskCalculator::GetContract(CalculatorContract *cc) + { + CHECK_GE(cc->Inputs().NumEntries(), 1); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + CHECK(cc->Outputs().HasTag(kMaskTag)); + } + + // Data streams to render. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kVectorTag) + { + cc->Inputs().Get(id).Set>(); + } + else if (tag.empty()) + { + // Empty tag defaults to accepting a single object of RenderData type. + cc->Inputs().Get(id).Set(); + } + } + + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs().Tag(kMaskTag).Set>(); + } + + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs().Tag(kFaceBoxTag).Set>(); + } + + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::Process(CalculatorContext *cc) + { + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + std::unique_ptr image_mat; + ImageFormat::Format target_format; + std::unordered_map all_masks; + + if (cc->Outputs().HasTag(kMaskTag)) + { + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + } + + // Render streams onto render target. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && tag != kVectorTag) + { + continue; + } + if (cc->Inputs().Get(id).IsEmpty()) + { + continue; + } + if (tag.empty()) + { + // Empty tag defaults to accepting a single object of RenderData type. + const RenderData &render_data = cc->Inputs().Get(id).Get(); + MP_RETURN_IF_ERROR(FormFacePartMask(cc, image_mat, &target_format, render_data, all_masks)); + + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + MP_RETURN_IF_ERROR(GetFaceBox(image_mat, render_data)); + } + } + else + { + RET_CHECK_EQ(kVectorTag, tag); + const std::vector &render_data_vec = + cc->Inputs().Get(id).Get>(); + for (const RenderData &render_data : render_data_vec) + { + MP_RETURN_IF_ERROR(FormFacePartMask(cc, image_mat, &target_format, render_data, all_masks)); + } + } + } + + // Copy the rendered image to output. + uchar *image_mat_ptr = image_mat->data; + MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks)); + + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::Close(CalculatorContext *cc) + { + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::RenderToCpu(CalculatorContext *cc, + std::unordered_map &all_masks) + { + + auto output_frame = absl::make_unique>(all_masks, all_masks.get_allocator()); + + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs() + .Tag(kMaskTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + auto output_frame2 = absl::make_unique>(face_box); + + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs() + .Tag(kFaceBoxTag) + .Add(output_frame2.release(), cc->InputTimestamp()); + } + + all_masks.clear(); + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC3, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGB; + } + + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::GetFaceBox(std::unique_ptr &image_mat, + const RenderData &render_data) + { + cv::Mat mat_image_ = *image_mat.get(); + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + std::vector x_s, y_s; + double box_min_y, box_max_y, box_max_x, box_min_x; + + for (auto &annotation : render_data.render_annotations()) + { + if (annotation.data_case() == RenderAnnotation::kPoint) + { + const auto &point = annotation.point(); + int x = -1; + int y = -1; + if (point.normalized()) + { + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + } + else + { + x = static_cast(point.x() * scale_factor_); + y = static_cast(point.y() * scale_factor_); + } + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); + + return absl::OkStatus(); + } + + absl::Status FormFaceMaskCalculator::FormFacePartMask(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const RenderData &render_data, + std::unordered_map &all_masks) + { + cv::Mat mat_image_ = *image_mat.get(); + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + std::unordered_map> orderList; + orderList.insert(make_pair("UPPER_LIP", UPPER_LIP)); + orderList.insert(make_pair("LOWER_LIP", LOWER_LIP)); + orderList.insert(make_pair("FACE_OVAL", FACE_OVAL)); + orderList.insert(make_pair("MOUTH_INSIDE", MOUTH_INSIDE)); + orderList.insert(make_pair("LEFT_EYE", LEFT_EYE)); + orderList.insert(make_pair("RIGHT_EYE", RIGHT_EYE)); + orderList.insert(make_pair("LEFT_BROW", LEFT_BROW)); + orderList.insert(make_pair("RIGHT_BROW", RIGHT_BROW)); + orderList.insert(make_pair("LIPS", LIPS)); + orderList.insert(make_pair("PART_FOREHEAD_B", PART_FOREHEAD_B)); + + cv::Mat mask; + std::vector point_array; + int c = 0; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + c = 0; + for (auto &annotation : render_data.render_annotations()) + { + if (annotation.data_case() == RenderAnnotation::kPoint) + { + if (order == c) + { + const auto &point = annotation.point(); + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + c += 1; + } + } + } + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(mat_image_.size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + + return absl::OkStatus(); + } + +} // namespace mediapipe diff --git a/mediapipe/calculators/beauty/smooth_face_calculator.cc b/mediapipe/calculators/beauty/smooth_face_calculator.cc new file mode 100644 index 000000000..f63abad09 --- /dev/null +++ b/mediapipe/calculators/beauty/smooth_face_calculator.cc @@ -0,0 +1,456 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +//#include + +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/util/annotation_renderer.h" +#include "mediapipe/util/render_data.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/vector.h" +#include "mediapipe/util/color.pb.h" + +namespace mediapipe +{ + namespace + { + + constexpr char kMaskTag[] = "MASK"; + constexpr char kFaceBoxTag[] = "FACEBOX"; + constexpr char kImageFrameTag[] = "IMAGE"; + + enum + { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + NUM_ATTRIBUTES + }; + + // Round up n to next multiple of m. + size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT + inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } + + using Point = RenderAnnotation::Point; + + } // namespace + + class SmoothFaceCalculator : public CalculatorBase + { + public: + SmoothFaceCalculator() = default; + ~SmoothFaceCalculator() override = default; + + static absl::Status GetContract(CalculatorContract *cc); + + // From Calculator. + absl::Status Open(CalculatorContext *cc) override; + absl::Status Process(CalculatorContext *cc) override; + absl::Status Close(CalculatorContext *cc) override; + + private: + absl::Status CreateRenderTargetCpu(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + + absl::Status RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat); + + absl::Status SmoothFace(CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec, + const std::tuple &face_box); + + cv::Mat predict_forehead_mask(std::unique_ptr &image_mat, + const std::unordered_map &mask_vec, double face_box_min_y); + + // Indicates if image frame is available as input. + bool image_frame_available_ = false; + std::unordered_map all_masks; + int width_ = 0; + int height_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; + }; + REGISTER_CALCULATOR(SmoothFaceCalculator); + + absl::Status SmoothFaceCalculator::GetContract(CalculatorContract *cc) + { + CHECK_GE(cc->Inputs().NumEntries(), 1); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + CHECK(cc->Outputs().HasTag(kImageFrameTag)); + } + + // Data streams to render. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kMaskTag) + { + cc->Inputs().Get(id).Set>(); + } + else if (tag.empty()) + { + // Empty tag defaults to accepting a single object of Mat type. + cc->Inputs().Get(id).Set(); + } + + if (tag == kFaceBoxTag) + { + cc->Inputs().Get(id).Set>(); + } + } + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs().Tag(kImageFrameTag).Set(); + } + + return absl::OkStatus(); + } + + absl::Status SmoothFaceCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) + { + image_frame_available_ = true; + } + else + { + } + + // Set the output header based on the input header (if present). + const char *tag = kImageFrameTag; + if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty()) + { + const auto &input_header = + cc->Inputs().Tag(tag).Header().Get(); + auto *output_video_header = new VideoHeader(input_header); + cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header)); + } + + return absl::OkStatus(); + } + + absl::Status SmoothFaceCalculator::Process(CalculatorContext *cc) + { + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kMaskTag) && + cc->Inputs().Tag(kMaskTag).IsEmpty()) + { + return absl::OkStatus(); + } + + if (cc->Inputs().HasTag(kFaceBoxTag) && + cc->Inputs().Tag(kFaceBoxTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + std::unique_ptr image_mat; + ImageFormat::Format target_format; + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + } + + // Render streams onto render target. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && (tag != kMaskTag || tag != kFaceBoxTag)) + { + continue; + } + if (cc->Inputs().Get(id).IsEmpty()) + { + continue; + } + + RET_CHECK_EQ(kMaskTag, tag); + const std::unordered_map &mask_vec = + cc->Inputs().Get(id).Get>(); + + RET_CHECK_EQ(kFaceBoxTag, tag); + const std::tuple &face_box = + cc->Inputs().Get(id).Get>(); + + if (mask_vec.size() > 1) + MP_RETURN_IF_ERROR(SmoothFace(cc, image_mat, &target_format, mask_vec, face_box)); + } + // Copy the rendered image to output. + uchar *image_mat_ptr = image_mat->data; + MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); + + return absl::OkStatus(); + } + + absl::Status SmoothFaceCalculator::Close(CalculatorContext *cc) + { + return absl::OkStatus(); + } + + absl::Status SmoothFaceCalculator::RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat) + { + + cv::Mat mat_image__ = *image_mat.get(); + + auto output_frame = absl::make_unique( + target_format, mat_image__.cols, mat_image__.rows); + + output_frame->CopyPixelData(target_format, mat_image__.cols, mat_image__.rows, data_image, + ImageFrame::kDefaultAlignmentBoundary); + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs() + .Tag(kImageFrameTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); + } + + absl::Status SmoothFaceCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC4, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + cv::Mat SmoothFaceCalculator::predict_forehead_mask(std::unique_ptr &image_mat, + const std::unordered_map &mask_vec, double face_box_min_y) + { + cv::Mat mat_image__ = *image_mat.get(); + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + cv::Mat part_forehead_mask = mask_vec.find("PART_FOREHEAD_B")->second.clone(); + part_forehead_mask.convertTo(part_forehead_mask, CV_32F, 1.0 / 255); + part_forehead_mask.convertTo(part_forehead_mask, CV_8U); + + cv::Mat image_sm, image_sm_hsv, skinMask; + + cv::resize(mat_image__, image_sm, cv::Size(mat_image__.size().width, mat_image__.size().height)); + cv::cvtColor(image_sm, image_sm_hsv, cv::COLOR_BGR2HSV); + + std::vector x, y; + std::vector location; + + cv::Vec3d hsv_min, hsv_max; + + std::vector channels(3); + cv::split(image_sm_hsv, channels); + std::vector> minx(3), maxx(3); + int c = 0; + for (auto ch : channels) + { + cv::Mat row, mask_row; + double min, max; + for (int i = 0; i < ch.rows; i++) + { + row = ch.row(i); + mask_row = part_forehead_mask.row(i); + cv::minMaxLoc(row, &min, &max, 0, 0, mask_row); + minx[c].push_back(min); + maxx[c].push_back(max); + } + c++; + } + for (int i = 0; i < 3; i++) + { + hsv_min[i] = *std::min_element(minx[i].begin(), minx[i].end()); + } + for (int i = 0; i < 3; i++) + { + hsv_max[i] = *std::max_element(maxx[i].begin(), maxx[i].end()); + } + + cv::Mat _forehead_kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(1, 1)); + cv::inRange(image_sm_hsv, hsv_min, hsv_max, skinMask); + cv::erode(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2); + cv::dilate(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2); + skinMask.convertTo(skinMask, CV_8U, 1.0 / 255); + + cv::findNonZero(skinMask, location); + + double max_part_f, x_min_part, x_max_part; + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + cv::minMaxLoc(y, NULL, &max_part_f); + cv::minMaxLoc(x, &x_min_part, &x_max_part); + + cv::Mat new_skin_mask = cv::Mat::zeros(skinMask.size(), CV_8U); + + new_skin_mask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)) = + skinMask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)); + + return new_skin_mask; + } + + absl::Status SmoothFaceCalculator::SmoothFace(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec, + const std::tuple &face_box) + { + + cv::Mat mat_image__ = *image_mat.get(); + + cv::Mat mouth_mask, mouth; + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + cv::Mat not_full_face = mask_vec.find("FACE_OVAL")->second.clone() + + predict_forehead_mask(image_mat, mask_vec, std::get<1>(face_box)) - + mask_vec.find("LEFT_EYE")->second.clone() - + mask_vec.find("RIGHT_EYE")->second.clone() - + mask_vec.find("LEFT_BROW")->second.clone() - + mask_vec.find("RIGHT_BROW")->second.clone() - + mask_vec.find("LIPS")->second.clone(); + + cv::resize(not_full_face, + not_full_face, + mat_image__.size(), 0, 0, + cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(not_full_face, location); + + double min_y, min_x, max_x, max_y; + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + cv::minMaxLoc(x, &min_x, &max_x); + cv::minMaxLoc(y, &min_y, &max_y); + + cv::Mat patch_face = mat_image__(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + cv::Mat patch_nff = not_full_face(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + cv::Mat patch_new, patch_wow; + cv::cvtColor(patch_face, patch_wow, cv::COLOR_RGBA2RGB); + cv::bilateralFilter(patch_wow, patch_new, 12, 50, 50); + + cv::Mat patch_new_nff, patch_new_mask, patch, patch_face_nff; + + patch_new.copyTo(patch_new_nff, patch_nff); + + patch_face.copyTo(patch_face_nff, patch_nff); + cv::cvtColor(patch_face_nff, patch_face_nff, cv::COLOR_RGBA2RGB); + + patch_new_mask = 0.85 * patch_new_nff + 0.15 * patch_face_nff; + + patch = cv::min(255, patch_new_mask); + + cv::cvtColor(patch, patch, cv::COLOR_RGB2RGBA); + + patch.copyTo(patch_face, patch_nff); + + return absl::OkStatus(); + } + +} // namespace mediapipe diff --git a/mediapipe/calculators/beauty/whiten_teeth_calculator.cc b/mediapipe/calculators/beauty/whiten_teeth_calculator.cc new file mode 100644 index 000000000..34eaafb17 --- /dev/null +++ b/mediapipe/calculators/beauty/whiten_teeth_calculator.cc @@ -0,0 +1,382 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +//#include + +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/util/annotation_renderer.h" +#include "mediapipe/util/render_data.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/vector.h" +#include "mediapipe/util/color.pb.h" + +namespace mediapipe +{ + namespace + { + + constexpr char kMaskTag[] = "MASK"; + constexpr char kImageFrameTag[] = "IMAGE"; + + enum + { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + NUM_ATTRIBUTES + }; + + // Round up n to next multiple of m. + size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT + inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; } + + using Point = RenderAnnotation::Point; + + } // namespace + + class WhitenTeethCalculator : public CalculatorBase + { + public: + WhitenTeethCalculator() = default; + ~WhitenTeethCalculator() override = default; + + static absl::Status GetContract(CalculatorContract *cc); + + // From Calculator. + absl::Status Open(CalculatorContext *cc) override; + absl::Status Process(CalculatorContext *cc) override; + absl::Status Close(CalculatorContext *cc) override; + + private: + absl::Status CreateRenderTargetCpu(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + + absl::Status RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat); + + absl::Status WhitenTeeth(CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec); + + // Indicates if image frame is available as input. + bool image_frame_available_ = false; + std::unordered_map all_masks; + int width_ = 0; + int height_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; + }; + REGISTER_CALCULATOR(WhitenTeethCalculator); + + absl::Status WhitenTeethCalculator::GetContract(CalculatorContract *cc) + { + CHECK_GE(cc->Inputs().NumEntries(), 1); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + CHECK(cc->Outputs().HasTag(kImageFrameTag)); + } + + // Data streams to render. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kMaskTag) + { + cc->Inputs().Get(id).Set>(); + } + else if (tag.empty()) + { + // Empty tag defaults to accepting a single object of Mat type. + cc->Inputs().Get(id).Set(); + } + } + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs().Tag(kImageFrameTag).Set(); + } + + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc)) + { + image_frame_available_ = true; + } + else + { + } + + // Set the output header based on the input header (if present). + const char *tag = kImageFrameTag; + if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty()) + { + const auto &input_header = + cc->Inputs().Tag(tag).Header().Get(); + auto *output_video_header = new VideoHeader(input_header); + cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header)); + } + + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::Process(CalculatorContext *cc) + { + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kMaskTag) && + cc->Inputs().Tag(kMaskTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + std::unique_ptr image_mat; + ImageFormat::Format target_format; + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + } + + // Render streams onto render target. + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) + { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && tag != kMaskTag) + { + continue; + } + if (cc->Inputs().Get(id).IsEmpty()) + { + continue; + } + + RET_CHECK_EQ(kMaskTag, tag); + const std::unordered_map &mask_vec = + cc->Inputs().Get(id).Get>(); + if (mask_vec.size() > 1) + MP_RETURN_IF_ERROR(WhitenTeeth(cc, image_mat, &target_format, mask_vec)); + } + + // Copy the rendered image to output. + uchar *image_mat_ptr = image_mat->data; + MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat)); + + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::Close(CalculatorContext *cc) + { + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::RenderToCpu( + CalculatorContext *cc, const ImageFormat::Format &target_format, + uchar *data_image, std::unique_ptr &image_mat) + { + + cv::Mat mat_image_ = *image_mat.get(); + + auto output_frame = absl::make_unique( + target_format, mat_image_.cols, mat_image_.rows); + + output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image, + ImageFrame::kDefaultAlignmentBoundary); + + if (cc->Outputs().HasTag(kImageFrameTag)) + { + cc->Outputs() + .Tag(kImageFrameTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC4, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + absl::Status WhitenTeethCalculator::WhitenTeeth(CalculatorContext *cc, + std::unique_ptr &image_mat, + ImageFormat::Format *target_format, + const std::unordered_map &mask_vec) + { + cv::Mat mat_image__ = *image_mat.get(); + + cv::Mat mouth_mask, mouth; + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + mouth_mask = cv::Mat::zeros(mat_image__.size(), CV_32F); + + mouth_mask = mask_vec.find("MOUTH_INSIDE")->second.clone(); + + cv::resize(mouth_mask, mouth, mat_image__.size(), cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(mouth, location); + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + if (!(x.empty()) && !(y.empty())) + { + double mouth_min_y, mouth_max_y, mouth_max_x, mouth_min_x; + cv::minMaxLoc(y, &mouth_min_y, &mouth_max_y); + cv::minMaxLoc(x, &mouth_min_x, &mouth_max_x); + double mh = mouth_max_y - mouth_min_y; + double mw = mouth_max_x - mouth_min_x; + cv::Mat mouth_crop_mask; + mouth.convertTo(mouth, CV_32F, 1.0 / 255); + mouth.convertTo(mouth, CV_32F, 1.0 / 255); + if (mh / mw > 0.17) + { + mouth_min_y = static_cast(std::max(mouth_min_y - mh * 0.1, 0.0)); + mouth_max_y = static_cast(std::min(mouth_max_y + mh * 0.1, (double)image_height_)); + mouth_min_x = static_cast(std::max(mouth_min_x - mw * 0.1, 0.0)); + mouth_max_x = static_cast(std::min(mouth_max_x + mw * 0.1, (double)image_width_)); + mouth_crop_mask = mouth(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)); + cv::Mat img_hsv, tmp_mask, img_hls; + cv::cvtColor(mat_image__(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)), img_hsv, + cv::COLOR_RGBA2RGB); + cv::cvtColor(img_hsv, img_hsv, + cv::COLOR_RGB2HSV); + + cv::Mat _mouth_erode_kernel = cv::getStructuringElement( + cv::MORPH_ELLIPSE, cv::Size(7, 7)); + + cv::erode(mouth_crop_mask * 255, tmp_mask, _mouth_erode_kernel, cv::Point(-1, -1), 3); + cv::GaussianBlur(tmp_mask, tmp_mask, cv::Size(51, 51), 0); + + img_hsv.convertTo(img_hsv, CV_8U); + + std::vector channels(3); + cv::split(img_hsv, channels); + + cv::Mat tmp; + cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U); + cv::subtract(channels[1], tmp, channels[1], cv::noArray(), CV_8U); + channels[1] = cv::min(255, channels[1]); + cv::merge(channels, img_hsv); + + cv::cvtColor(img_hsv, img_hsv, cv::COLOR_HSV2RGB); + cv::cvtColor(img_hsv, img_hls, cv::COLOR_RGB2HLS); + + cv::split(img_hls, channels); + cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U); + cv::add(channels[1], tmp, channels[1], cv::noArray(), CV_8U); + channels[1] = cv::min(255, channels[1]); + cv::merge(channels, img_hls); + + cv::cvtColor(img_hls, img_hls, cv::COLOR_HLS2RGB); + cv::cvtColor(img_hls, img_hls, cv::COLOR_RGB2RGBA); + + cv::Mat slice = mat_image__(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)); + img_hls.copyTo(slice); + } + } + + return absl::OkStatus(); + } + +} // namespace mediapipe diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/BUILD new file mode 100644 index 000000000..4cbc66e50 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/BUILD @@ -0,0 +1,63 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/beauty:mobile_calculators", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +android_binary( + name = "beautygpu", + srcs = glob(["*.java"]), + assets = [ + "//mediapipe/graphs/beauty:beauty_mobile_gpu.binarypb", + "//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite", + "//mediapipe/modules/face_detection:face_detection_short_range.tflite", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.beautygpu", + "appName": "Beauty", + "mainActivity": ".MainActivity", + "cameraFacingFront": "True", + "binaryGraphName": "beauty_mobile_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + "converterNumBuffers": "2", + }, + multidex = "native", + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/MainActivity.java new file mode 100644 index 000000000..816ec14c3 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/beauty/MainActivity.java @@ -0,0 +1,93 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.beautygpu; + +import android.os.Bundle; +import android.util.Log; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.framework.AndroidPacketCreator; +import com.google.mediapipe.framework.Packet; +import com.google.mediapipe.framework.PacketGetter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Main activity of MediaPipe face mesh app. */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "MainActivity"; + + private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks"; + // Max number of faces to detect/process. + private static final int NUM_FACES = 1; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + AndroidPacketCreator packetCreator = processor.getPacketCreator(); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES)); + processor.setInputSidePackets(inputSidePackets); + + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + Log.v(TAG, "Received multi face landmarks packet."); + List multiFaceLandmarks = + PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); + Log.v( + TAG, + "[TS:" + + packet.getTimestamp() + + "] " + + getMultiFaceLandmarksDebugString(multiFaceLandmarks)); + }); + } + } + + private static String getMultiFaceLandmarksDebugString( + List multiFaceLandmarks) { + if (multiFaceLandmarks.isEmpty()) { + return "No face landmarks"; + } + String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n"; + int faceIndex = 0; + for (NormalizedLandmarkList landmarks : multiFaceLandmarks) { + multiFaceLandmarksStr += + "\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n"; + int landmarkIndex = 0; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + multiFaceLandmarksStr += + "\t\tLandmark [" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + ++faceIndex; + } + return multiFaceLandmarksStr; + } +} diff --git a/mediapipe/examples/desktop/beauty/BUILD b/mediapipe/examples/desktop/beauty/BUILD new file mode 100644 index 000000000..e6d4f6909 --- /dev/null +++ b/mediapipe/examples/desktop/beauty/BUILD @@ -0,0 +1,33 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "face_mesh_tflite", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/beauty:desktop_calculators", + ], +) + +cc_binary( + name = "face_mesh_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/beauty:desktop_live_calculators", + ], +) diff --git a/mediapipe/graphs/beauty/BUILD b/mediapipe/graphs/beauty/BUILD new file mode 100644 index 000000000..7c191aec9 --- /dev/null +++ b/mediapipe/graphs/beauty/BUILD @@ -0,0 +1,71 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "desktop_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/video:opencv_video_decoder_calculator", + "//mediapipe/calculators/video:opencv_video_encoder_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu", + "//mediapipe/modules/face_landmark:face_landmark_front_cpu", + ], +) + +cc_library( + name = "desktop_live_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu", + "//mediapipe/modules/face_landmark:face_landmark_front_cpu", + ], +) + +cc_library( + name = "desktop_live_gpu_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_gpu", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + +cc_library( + name = "mobile_calculators", + deps = [ + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + +mediapipe_binary_graph( + name = "beauty_mobile_gpu_binary_graph", + graph = "beauty_mobile.pbtxt", + output_name = "beauty_mobile_gpu.binarypb", + deps = [":mobile_calculators"], +) diff --git a/mediapipe/graphs/beauty/beauty.pbtxt b/mediapipe/graphs/beauty/beauty.pbtxt new file mode 100644 index 000000000..2cc563424 --- /dev/null +++ b/mediapipe/graphs/beauty/beauty.pbtxt @@ -0,0 +1,66 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + packet { bool_value: true } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererCpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/beauty/beauty_mobile.pbtxt b/mediapipe/graphs/beauty/beauty_mobile.pbtxt new file mode 100644 index 000000000..8e4d262cc --- /dev/null +++ b/mediapipe/graphs/beauty/beauty_mobile.pbtxt @@ -0,0 +1,82 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU. + +# GPU buffer. (GpuBuffer) +input_stream: "input_video" + +# Max number of faces to detect/process. (int) +input_side_packet: "num_faces" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { bool_value: true } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Defines side packets for further use in the graph. +node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "throttled_input_video" + output_stream: "throttled_input_video_cpu" +} + + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererCpu" + input_stream: "IMAGE:throttled_input_video_cpu" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video_cpu" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "output_video_cpu" + output_stream: "output_video" +} diff --git a/mediapipe/graphs/beauty/calculators/BUILD b/mediapipe/graphs/beauty/calculators/BUILD new file mode 100644 index 000000000..3bebfc9c8 --- /dev/null +++ b/mediapipe/graphs/beauty/calculators/BUILD @@ -0,0 +1,37 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "face_landmarks_to_render_data_calculator", + srcs = ["face_landmarks_to_render_data_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) diff --git a/mediapipe/graphs/beauty/calculators/annotation_renderer.cc b/mediapipe/graphs/beauty/calculators/annotation_renderer.cc new file mode 100644 index 000000000..fcb5bf109 --- /dev/null +++ b/mediapipe/graphs/beauty/calculators/annotation_renderer.cc @@ -0,0 +1,953 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/util/annotation_renderer.h" + +#include + +#include +#include +//#include + +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/vector.h" +#include "mediapipe/util/color.pb.h" + +namespace mediapipe { +namespace { + +using Arrow = RenderAnnotation::Arrow; +using FilledOval = RenderAnnotation::FilledOval; +using FilledRectangle = RenderAnnotation::FilledRectangle; +using FilledRoundedRectangle = RenderAnnotation::FilledRoundedRectangle; +using Point = RenderAnnotation::Point; +using Line = RenderAnnotation::Line; +using GradientLine = RenderAnnotation::GradientLine; +using Oval = RenderAnnotation::Oval; +using Rectangle = RenderAnnotation::Rectangle; +using RoundedRectangle = RenderAnnotation::RoundedRectangle; +using Text = RenderAnnotation::Text; + +static const std::vector UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}; +static const std::vector LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; +static const std::vector FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, + 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, + 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, + 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, + 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; +static const std::vector MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}; +static const std::vector PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}; +static const std::vector LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}; +static const std::vector RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}; +static const std::vector LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; +static const std::vector LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}; +static const std::vector RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}; + +int ClampThickness(int thickness) { + constexpr int kMaxThickness = 32767; // OpenCV MAX_THICKNESS + return std::clamp(thickness, 1, kMaxThickness); +} + +bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y, + int image_width, int image_height, int* x_px, + int* y_px) { + CHECK(x_px != nullptr); + CHECK(y_px != nullptr); + CHECK_GT(image_width, 0); + CHECK_GT(image_height, 0); + + if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 || + normalized_y > 1.0) { + VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0"; + } + + *x_px = static_cast(round(normalized_x * image_width)); + *y_px = static_cast(round(normalized_y * image_height)); + + return true; +} + +cv::Scalar MediapipeColorToOpenCVColor(const Color& color) { + return cv::Scalar(color.r(), color.g(), color.b()); +} + +cv::RotatedRect RectangleToOpenCVRotatedRect(int left, int top, int right, + int bottom, double rotation) { + return cv::RotatedRect( + cv::Point2f((left + right) / 2.f, (top + bottom) / 2.f), + cv::Size2f(right - left, bottom - top), rotation / M_PI * 180.f); +} + +void cv_line2(cv::Mat& img, const cv::Point& start, const cv::Point& end, + const cv::Scalar& color1, const cv::Scalar& color2, + int thickness) { + cv::LineIterator iter(img, start, end, /*cv::LINE_4=*/4); + for (int i = 0; i < iter.count; i++, iter++) { + const double alpha = static_cast(i) / iter.count; + const cv::Scalar new_color(color1 * (1.0 - alpha) + color2 * alpha); + const cv::Rect rect(iter.pos(), cv::Size(thickness, thickness)); + cv::rectangle(img, rect, new_color, /*cv::FILLED=*/-1, /*cv::LINE_4=*/4); + } +} + +} // namespace + +void AnnotationRenderer::RenderDataOnImage(const RenderData &render_data) +{ + if (render_data.render_annotations().size()){ + DrawLipstick(render_data); + WhitenTeeth(render_data); + smooth_face(render_data); + } + else + { + LOG(FATAL) << "Unknown annotation type: "; + } +} + +void AnnotationRenderer::AdoptImage(cv::Mat* input_image) { + image_width_ = input_image->cols; + image_height_ = input_image->rows; + + // No pixel data copy here, only headers are copied. + mat_image_ = *input_image; +} + +int AnnotationRenderer::GetImageWidth() const { return mat_image_.cols; } +int AnnotationRenderer::GetImageHeight() const { return mat_image_.rows; } + +void AnnotationRenderer::SetFlipTextVertically(bool flip) { + flip_text_vertically_ = flip; +} + +void AnnotationRenderer::SetScaleFactor(float scale_factor) { + if (scale_factor > 0.0f) scale_factor_ = std::min(scale_factor, 1.0f); +} + +cv::Mat AnnotationRenderer::FormFacePartMask(std::vector orderList, const RenderData &render_data) +{ + int c = 0; + std::vector point_array; + for (auto order : orderList) + { + c = 0; + for (auto &annotation : render_data.render_annotations()) + { + if (annotation.data_case() == RenderAnnotation::kPoint) + { + if (order == c) + { + const auto &point = annotation.point(); + int x = -1; + int y = -1; + if (point.normalized()) + { + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + } + else + { + x = static_cast(point.x() * scale_factor_); + y = static_cast(point.y() * scale_factor_); + } + point_array.push_back(cv::Point(x, y)); + } + c += 1; + } + } + } + + cv::Mat mask; + std::vector> point; + point.push_back(point_array); + mask = cv::Mat::zeros(mat_image_.size(), CV_32F); + + cv::fillPoly(mask, point, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + + return mask; +} + +std::tuple AnnotationRenderer::GetFaceBox(const RenderData &render_data) +{ + std::vector x_s, y_s; + double box_min_y, box_max_y, box_max_x, box_min_x; + + for (auto &annotation : render_data.render_annotations()) + { + if (annotation.data_case() == RenderAnnotation::kPoint) + { + const auto &point = annotation.point(); + int x = -1; + int y = -1; + if (point.normalized()) + { + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + } + else + { + x = static_cast(point.x() * scale_factor_); + y = static_cast(point.y() * scale_factor_); + } + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + + return std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); +} + +cv::Mat AnnotationRenderer::predict_forehead_mask(const RenderData &render_data, double face_box_min_y) +{ + + cv::Mat part_forehead_mask = AnnotationRenderer::FormFacePartMask(PART_FOREHEAD_B, render_data); + part_forehead_mask.convertTo(part_forehead_mask, CV_32F, 1.0 / 255); + part_forehead_mask.convertTo(part_forehead_mask, CV_8U); + + cv::Mat image_sm, image_sm_hsv, skinMask; + + cv::resize(mat_image_, image_sm, cv::Size(mat_image_.size().width, mat_image_.size().height)); + cv::cvtColor(image_sm, image_sm_hsv, cv::COLOR_BGR2HSV); + + std::vector x, y; + std::vector location; + // std::cout << "R (numpy) = " << std::endl << cv::format(part_forehead_mask, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl; + + cv::Vec3d hsv_min, hsv_max; + + std::vector channels(3); + cv::split(image_sm_hsv, channels); + std::vector> minx(3), maxx(3); + int c = 0; + for (auto ch : channels) + { + cv::Mat row, mask_row; + double min, max; + for (int i = 0; i < ch.rows; i++) + { + row = ch.row(i); + mask_row = part_forehead_mask.row(i); + cv::minMaxLoc(row, &min, &max, 0, 0, mask_row); + minx[c].push_back(min); + maxx[c].push_back(max); + } + c++; + } + for (int i = 0; i < 3; i++) + { + hsv_min[i] = *std::min_element(minx[i].begin(), minx[i].end()); + } + for (int i = 0; i < 3; i++) + { + hsv_max[i] = *std::max_element(maxx[i].begin(), maxx[i].end()); + } + + cv::Mat _forehead_kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(1, 1)); + cv::inRange(image_sm_hsv, hsv_min, hsv_max, skinMask); + cv::erode(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2); + cv::dilate(skinMask, skinMask, _forehead_kernel, cv::Point(-1, -1), 2); + skinMask.convertTo(skinMask, CV_8U, 1.0 / 255); + + cv::findNonZero(skinMask, location); + + double max_part_f, x_min_part, x_max_part; + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + cv::minMaxLoc(y, NULL, &max_part_f); + cv::minMaxLoc(x, &x_min_part, &x_max_part); + + cv::Mat new_skin_mask = cv::Mat::zeros(skinMask.size(), CV_8U); + + new_skin_mask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)) = + skinMask(cv::Range(face_box_min_y, max_part_f), cv::Range(x_min_part, x_max_part)); + + return new_skin_mask; +} + +void AnnotationRenderer::smooth_face(const RenderData &render_data) +{ + + cv::Mat not_full_face = cv::Mat(FormFacePartMask(FACE_OVAL, render_data)) + + cv::Mat(predict_forehead_mask(render_data, std::get<1>(GetFaceBox(render_data)))) - + cv::Mat(FormFacePartMask(LEFT_EYE, render_data)) - + cv::Mat(FormFacePartMask(RIGHT_EYE, render_data)) - + cv::Mat(FormFacePartMask(LEFT_BROW, render_data)) - + cv::Mat(FormFacePartMask(RIGHT_BROW, render_data)) - + cv::Mat(FormFacePartMask(LIPS, render_data)); + + cv::resize(not_full_face, + not_full_face, + mat_image_.size(), 0, 0, + cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(not_full_face, location); + + double min_y, min_x, max_x, max_y; + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + cv::minMaxLoc(x, &min_x, &max_x); + cv::minMaxLoc(y, &min_y, &max_y); + + cv::Mat patch_face = mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + cv::Mat patch_nff = not_full_face(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + cv::Mat patch_new; + cv::bilateralFilter(patch_face, patch_new, 12, 50, 50); + + cv::Mat patch_new_nff, patch_new_mask, patch, patch_face_nff; + + patch_new.copyTo(patch_new_nff, patch_nff); + patch_face.copyTo(patch_face_nff, patch_nff); + + patch_new_mask = 0.85 * patch_new_nff + 0.15 * patch_face_nff; + + patch = cv::min(255, patch_new_mask); + patch.copyTo(patch_face, patch_nff); +} + +cv::Mat matmul32F(cv::Mat& bgr, cv::Mat& mask) +{ + assert(bgr.type() == CV_32FC3 && mask.type() == CV_32FC1 && bgr.size() == mask.size()); + int H = bgr.rows; + int W = bgr.cols; + cv::Mat dst(bgr.size(), bgr.type()); + + if (bgr.isContinuous() && mask.isContinuous()) + { + W *= H; + H = 1; + } + + for( int i = 0; i < H; ++i) + { + float* pdst = ((float*)dst.data)+i*W*3; + float* pbgr = ((float*)bgr.data)+i*W*3; + float* pmask = ((float*)mask.data) + i*W; + for ( int j = 0; j < W; ++j) + { + (*pdst++) = (*pbgr++) *(*pmask); + (*pdst++) = (*pbgr++) *(*pmask); + (*pdst++) = (*pbgr++) *(*pmask); + pmask+=1; + } + } + return dst; +} + +void AnnotationRenderer::DrawLipstick(const RenderData &render_data) +{ + cv::Mat spec_lips_mask, upper_lips_mask, lower_lips_mask; + spec_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F); + upper_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F); + lower_lips_mask = cv::Mat::zeros(mat_image_.size(), CV_32F); + + upper_lips_mask = AnnotationRenderer::FormFacePartMask(UPPER_LIP, render_data); + lower_lips_mask = AnnotationRenderer::FormFacePartMask(LOWER_LIP, render_data); + + spec_lips_mask = upper_lips_mask + lower_lips_mask; + + spec_lips_mask.convertTo(spec_lips_mask, CV_8U); + + cv::resize(spec_lips_mask, spec_lips_mask, mat_image_.size(), cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(spec_lips_mask, location); + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + if (!(x.empty()) && !(y.empty())) + { + double min_y, max_y, max_x, min_x; + cv::minMaxLoc(y, &min_y, &max_y); + cv::minMaxLoc(x, &min_x, &max_x); + + cv::Mat lips_crop_mask = spec_lips_mask(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255); + + cv::Mat lips_crop = cv::Mat(mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)).size(), CV_8UC3); + mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)).copyTo(lips_crop); + + lips_crop.convertTo(lips_crop, CV_32FC3); + + cv::Mat lips_blend; + lips_blend = cv::Mat(lips_crop.size().height, lips_crop.size().width, CV_32FC3, cv::Scalar(255.0, 0, 0)); + lips_crop_mask *= 50; + lips_crop_mask.convertTo(lips_crop_mask, CV_32F, 1.0 / 255); + + lips_blend = matmul32F(lips_blend, lips_crop_mask); + + cv::Mat tmp_crop_mask = 1.0 - lips_crop_mask; + + cv::Mat slice = mat_image_(cv::Range(min_y, max_y), cv::Range(min_x, max_x)); + + lips_crop = matmul32F(lips_crop, tmp_crop_mask); + + cv::add(lips_blend, lips_crop, slice, cv::noArray(), CV_8U); + } +} + +void AnnotationRenderer::WhitenTeeth(const RenderData &render_data) +{ + cv::Mat mouth_mask, mouth; + + mouth_mask = cv::Mat::zeros(mat_image_.size(), CV_32F); + mouth_mask = AnnotationRenderer::FormFacePartMask(MOUTH_INSIDE, render_data); + + cv::resize(mouth_mask, mouth, mat_image_.size(), cv::INTER_LINEAR); + + std::vector x, y; + std::vector location; + + cv::findNonZero(mouth, location); + + for (auto &i : location) + { + x.push_back(i.x); + y.push_back(i.y); + } + + if (!(x.empty()) && !(y.empty())) + { + double mouth_min_y, mouth_max_y, mouth_max_x, mouth_min_x; + cv::minMaxLoc(y, &mouth_min_y, &mouth_max_y); + cv::minMaxLoc(x, &mouth_min_x, &mouth_max_x); + double mh = mouth_max_y - mouth_min_y; + double mw = mouth_max_x - mouth_min_x; + cv::Mat mouth_crop_mask; + mouth.convertTo(mouth, CV_32F, 1.0 / 255); + mouth.convertTo(mouth, CV_32F, 1.0 / 255); + if (mh / mw > 0.17) + { + mouth_min_y = static_cast(std::max(mouth_min_y - mh * 0.1, 0.0)); + mouth_max_y = static_cast(std::min(mouth_max_y + mh * 0.1, (double)image_height_)); + mouth_min_x = static_cast(std::max(mouth_min_x - mw * 0.1, 0.0)); + mouth_max_x = static_cast(std::min(mouth_max_x + mw * 0.1, (double)image_width_)); + mouth_crop_mask = mouth(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)); + cv::Mat img_hsv, tmp_mask, img_hls; + cv::cvtColor(mat_image_(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)), img_hsv, + cv::COLOR_RGB2HSV); + + cv::Mat _mouth_erode_kernel = cv::getStructuringElement( + cv::MORPH_ELLIPSE, cv::Size(7, 7)); + + cv::erode(mouth_crop_mask * 255, tmp_mask, _mouth_erode_kernel, cv::Point(-1, -1), 3); + cv::GaussianBlur(tmp_mask, tmp_mask, cv::Size(51, 51), 0); + + img_hsv.convertTo(img_hsv, CV_8U); + + std::vector channels(3); + cv::split(img_hsv, channels); + + cv::Mat tmp; + cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U); + cv::subtract(channels[1], tmp, channels[1], cv::noArray(), CV_8U); + channels[1] = cv::min(255, channels[1]); + cv::merge(channels, img_hsv); + + cv::cvtColor(img_hsv, img_hsv, cv::COLOR_HSV2RGB); + cv::cvtColor(img_hsv, img_hls, cv::COLOR_RGB2HLS); + + cv::split(img_hls, channels); + cv::multiply(channels[1], tmp_mask, tmp, 0.3, CV_8U); + cv::add(channels[1], tmp, channels[1], cv::noArray(), CV_8U); + channels[1] = cv::min(255, channels[1]); + cv::merge(channels, img_hls); + + cv::cvtColor(img_hls, img_hls, cv::COLOR_HLS2RGB); + + // std::cout << "R (numpy) = " << std::endl << cv::format(img_hls, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl; + + cv::Mat slice = mat_image_(cv::Range(mouth_min_y, mouth_max_y), cv::Range(mouth_min_x, mouth_max_x)); + img_hls.copyTo(slice); + } + } +} + +void AnnotationRenderer::DrawRectangle(const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& rectangle = annotation.rectangle(); + if (rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(), + image_width_, image_height_, &left, + &top)); + CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(), + image_width_, image_height_, &right, + &bottom)); + } else { + left = static_cast(rectangle.left() * scale_factor_); + top = static_cast(rectangle.top() * scale_factor_); + right = static_cast(rectangle.right() * scale_factor_); + bottom = static_cast(rectangle.bottom() * scale_factor_); + } + + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + if (rectangle.rotation() != 0.0) { + const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom, + rectangle.rotation()); + const int kNumVertices = 4; + cv::Point2f vertices[kNumVertices]; + rect.points(vertices); + for (int i = 0; i < kNumVertices; i++) { + cv::line(mat_image_, vertices[i], vertices[(i + 1) % kNumVertices], color, + thickness); + } + } else { + cv::Rect rect(left, top, right - left, bottom - top); + cv::rectangle(mat_image_, rect, color, thickness); + } + if (rectangle.has_top_left_thickness()) { + const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom, + rectangle.rotation()); + const int kNumVertices = 4; + cv::Point2f vertices[kNumVertices]; + rect.points(vertices); + const int top_left_thickness = + ClampThickness(round(rectangle.top_left_thickness() * scale_factor_)); + cv::ellipse(mat_image_, vertices[1], + cv::Size(top_left_thickness, top_left_thickness), 0.0, 0, 360, + color, -1); + } +} + +void AnnotationRenderer::DrawFilledRectangle( + const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& rectangle = annotation.filled_rectangle().rectangle(); + if (rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(), + image_width_, image_height_, &left, + &top)); + CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(), + image_width_, image_height_, &right, + &bottom)); + } else { + left = static_cast(rectangle.left() * scale_factor_); + top = static_cast(rectangle.top() * scale_factor_); + right = static_cast(rectangle.right() * scale_factor_); + bottom = static_cast(rectangle.bottom() * scale_factor_); + } + + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + if (rectangle.rotation() != 0.0) { + const auto& rect = RectangleToOpenCVRotatedRect(left, top, right, bottom, + rectangle.rotation()); + const int kNumVertices = 4; + cv::Point2f vertices2f[kNumVertices]; + rect.points(vertices2f); + // Convert cv::Point2f[] to cv::Point[]. + cv::Point vertices[kNumVertices]; + for (int i = 0; i < kNumVertices; ++i) { + vertices[i] = vertices2f[i]; + } + cv::fillConvexPoly(mat_image_, vertices, kNumVertices, color); + } else { + cv::Rect rect(left, top, right - left, bottom - top); + cv::rectangle(mat_image_, rect, color, -1); + } +} + +void AnnotationRenderer::DrawRoundedRectangle( + const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& rectangle = annotation.rounded_rectangle().rectangle(); + if (rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(), + image_width_, image_height_, &left, + &top)); + CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(), + image_width_, image_height_, &right, + &bottom)); + } else { + left = static_cast(rectangle.left() * scale_factor_); + top = static_cast(rectangle.top() * scale_factor_); + right = static_cast(rectangle.right() * scale_factor_); + bottom = static_cast(rectangle.bottom() * scale_factor_); + } + + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + const int corner_radius = + round(annotation.rounded_rectangle().corner_radius() * scale_factor_); + const int line_type = annotation.rounded_rectangle().line_type(); + DrawRoundedRectangle(mat_image_, cv::Point(left, top), + cv::Point(right, bottom), color, thickness, line_type, + corner_radius); +} + +void AnnotationRenderer::DrawFilledRoundedRectangle( + const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& rectangle = + annotation.filled_rounded_rectangle().rounded_rectangle().rectangle(); + if (rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(rectangle.left(), rectangle.top(), + image_width_, image_height_, &left, + &top)); + CHECK(NormalizedtoPixelCoordinates(rectangle.right(), rectangle.bottom(), + image_width_, image_height_, &right, + &bottom)); + } else { + left = static_cast(rectangle.left() * scale_factor_); + top = static_cast(rectangle.top() * scale_factor_); + right = static_cast(rectangle.right() * scale_factor_); + bottom = static_cast(rectangle.bottom() * scale_factor_); + } + + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int corner_radius = + annotation.rounded_rectangle().corner_radius() * scale_factor_; + const int line_type = annotation.rounded_rectangle().line_type(); + DrawRoundedRectangle(mat_image_, cv::Point(left, top), + cv::Point(right, bottom), color, -1, line_type, + corner_radius); +} + +void AnnotationRenderer::DrawRoundedRectangle(cv::Mat src, cv::Point top_left, + cv::Point bottom_right, + const cv::Scalar& line_color, + int thickness, int line_type, + int corner_radius) { + // Corners: + // p1 - p2 + // | | + // p4 - p3 + cv::Point p1 = top_left; + cv::Point p2 = cv::Point(bottom_right.x, top_left.y); + cv::Point p3 = bottom_right; + cv::Point p4 = cv::Point(top_left.x, bottom_right.y); + + // Draw edges of the rectangle + cv::line(src, cv::Point(p1.x + corner_radius, p1.y), + cv::Point(p2.x - corner_radius, p2.y), line_color, thickness, + line_type); + cv::line(src, cv::Point(p2.x, p2.y + corner_radius), + cv::Point(p3.x, p3.y - corner_radius), line_color, thickness, + line_type); + cv::line(src, cv::Point(p4.x + corner_radius, p4.y), + cv::Point(p3.x - corner_radius, p3.y), line_color, thickness, + line_type); + cv::line(src, cv::Point(p1.x, p1.y + corner_radius), + cv::Point(p4.x, p4.y - corner_radius), line_color, thickness, + line_type); + + // Draw arcs at corners. + cv::ellipse(src, p1 + cv::Point(corner_radius, corner_radius), + cv::Size(corner_radius, corner_radius), 180.0, 0, 90, line_color, + thickness, line_type); + cv::ellipse(src, p2 + cv::Point(-corner_radius, corner_radius), + cv::Size(corner_radius, corner_radius), 270.0, 0, 90, line_color, + thickness, line_type); + cv::ellipse(src, p3 + cv::Point(-corner_radius, -corner_radius), + cv::Size(corner_radius, corner_radius), 0.0, 0, 90, line_color, + thickness, line_type); + cv::ellipse(src, p4 + cv::Point(corner_radius, -corner_radius), + cv::Size(corner_radius, corner_radius), 90.0, 0, 90, line_color, + thickness, line_type); +} + +void AnnotationRenderer::DrawOval(const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& enclosing_rectangle = annotation.oval().rectangle(); + if (enclosing_rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(enclosing_rectangle.left(), + enclosing_rectangle.top(), image_width_, + image_height_, &left, &top)); + CHECK(NormalizedtoPixelCoordinates( + enclosing_rectangle.right(), enclosing_rectangle.bottom(), image_width_, + image_height_, &right, &bottom)); + } else { + left = static_cast(enclosing_rectangle.left() * scale_factor_); + top = static_cast(enclosing_rectangle.top() * scale_factor_); + right = static_cast(enclosing_rectangle.right() * scale_factor_); + bottom = static_cast(enclosing_rectangle.bottom() * scale_factor_); + } + + cv::Point center((left + right) / 2, (top + bottom) / 2); + cv::Size size((right - left) / 2, (bottom - top) / 2); + const double rotation = enclosing_rectangle.rotation() / M_PI * 180.f; + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + cv::ellipse(mat_image_, center, size, rotation, 0, 360, color, thickness); +} + +void AnnotationRenderer::DrawFilledOval(const RenderAnnotation& annotation) { + int left = -1; + int top = -1; + int right = -1; + int bottom = -1; + const auto& enclosing_rectangle = annotation.filled_oval().oval().rectangle(); + if (enclosing_rectangle.normalized()) { + CHECK(NormalizedtoPixelCoordinates(enclosing_rectangle.left(), + enclosing_rectangle.top(), image_width_, + image_height_, &left, &top)); + CHECK(NormalizedtoPixelCoordinates( + enclosing_rectangle.right(), enclosing_rectangle.bottom(), image_width_, + image_height_, &right, &bottom)); + } else { + left = static_cast(enclosing_rectangle.left() * scale_factor_); + top = static_cast(enclosing_rectangle.top() * scale_factor_); + right = static_cast(enclosing_rectangle.right() * scale_factor_); + bottom = static_cast(enclosing_rectangle.bottom() * scale_factor_); + } + + cv::Point center((left + right) / 2, (top + bottom) / 2); + cv::Size size(std::max(0, (right - left) / 2), + std::max(0, (bottom - top) / 2)); + const double rotation = enclosing_rectangle.rotation() / M_PI * 180.f; + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + cv::ellipse(mat_image_, center, size, rotation, 0, 360, color, -1); +} + +void AnnotationRenderer::DrawArrow(const RenderAnnotation& annotation) { + int x_start = -1; + int y_start = -1; + int x_end = -1; + int y_end = -1; + + const auto& arrow = annotation.arrow(); + if (arrow.normalized()) { + CHECK(NormalizedtoPixelCoordinates(arrow.x_start(), arrow.y_start(), + image_width_, image_height_, &x_start, + &y_start)); + CHECK(NormalizedtoPixelCoordinates(arrow.x_end(), arrow.y_end(), + image_width_, image_height_, &x_end, + &y_end)); + } else { + x_start = static_cast(arrow.x_start() * scale_factor_); + y_start = static_cast(arrow.y_start() * scale_factor_); + x_end = static_cast(arrow.x_end() * scale_factor_); + y_end = static_cast(arrow.y_end() * scale_factor_); + } + + cv::Point arrow_start(x_start, y_start); + cv::Point arrow_end(x_end, y_end); + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + + // Draw the main arrow line. + cv::line(mat_image_, arrow_start, arrow_end, color, thickness); + + // Compute the arrowtip left and right vectors. + Vector2_d L_start(static_cast(x_start), static_cast(y_start)); + Vector2_d L_end(static_cast(x_end), static_cast(y_end)); + Vector2_d U = (L_end - L_start).Normalize(); + Vector2_d V = U.Ortho(); + double line_length = (L_end - L_start).Norm(); + constexpr double kArrowTipLengthProportion = 0.2; + double arrowtip_length = kArrowTipLengthProportion * line_length; + Vector2_d arrowtip_left = L_end - arrowtip_length * U + arrowtip_length * V; + Vector2_d arrowtip_right = L_end - arrowtip_length * U - arrowtip_length * V; + + // Draw the arrowtip left and right lines. + cv::Point arrowtip_left_start(static_cast(round(arrowtip_left[0])), + static_cast(round(arrowtip_left[1]))); + cv::Point arrowtip_right_start(static_cast(round(arrowtip_right[0])), + static_cast(round(arrowtip_right[1]))); + cv::line(mat_image_, arrowtip_left_start, arrow_end, color, thickness); + cv::line(mat_image_, arrowtip_right_start, arrow_end, color, thickness); +} + +void AnnotationRenderer::DrawPoint(const RenderAnnotation& annotation) { + const auto& point = annotation.point(); + int x = -1; + int y = -1; + if (point.normalized()) { + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + } else { + x = static_cast(point.x() * scale_factor_); + y = static_cast(point.y() * scale_factor_); + } + + cv::Point point_to_draw(x, y); + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + cv::circle(mat_image_, point_to_draw, thickness, color, -1); +} + +void AnnotationRenderer::DrawLine(const RenderAnnotation& annotation) { + int x_start = -1; + int y_start = -1; + int x_end = -1; + int y_end = -1; + + const auto& line = annotation.line(); + if (line.normalized()) { + CHECK(NormalizedtoPixelCoordinates(line.x_start(), line.y_start(), + image_width_, image_height_, &x_start, + &y_start)); + CHECK(NormalizedtoPixelCoordinates(line.x_end(), line.y_end(), image_width_, + image_height_, &x_end, &y_end)); + } else { + x_start = static_cast(line.x_start() * scale_factor_); + y_start = static_cast(line.y_start() * scale_factor_); + x_end = static_cast(line.x_end() * scale_factor_); + y_end = static_cast(line.y_end() * scale_factor_); + } + + cv::Point start(x_start, y_start); + cv::Point end(x_end, y_end); + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + cv::line(mat_image_, start, end, color, thickness); +} + +void AnnotationRenderer::DrawGradientLine(const RenderAnnotation& annotation) { + int x_start = -1; + int y_start = -1; + int x_end = -1; + int y_end = -1; + + const auto& line = annotation.gradient_line(); + if (line.normalized()) { + CHECK(NormalizedtoPixelCoordinates(line.x_start(), line.y_start(), + image_width_, image_height_, &x_start, + &y_start)); + CHECK(NormalizedtoPixelCoordinates(line.x_end(), line.y_end(), image_width_, + image_height_, &x_end, &y_end)); + } else { + x_start = static_cast(line.x_start() * scale_factor_); + y_start = static_cast(line.y_start() * scale_factor_); + x_end = static_cast(line.x_end() * scale_factor_); + y_end = static_cast(line.y_end() * scale_factor_); + } + + const cv::Point start(x_start, y_start); + const cv::Point end(x_end, y_end); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + const cv::Scalar color1 = MediapipeColorToOpenCVColor(line.color1()); + const cv::Scalar color2 = MediapipeColorToOpenCVColor(line.color2()); + cv_line2(mat_image_, start, end, color1, color2, thickness); +} + +void AnnotationRenderer::DrawText(const RenderAnnotation& annotation) { + int left = -1; + int baseline = -1; + int font_size = -1; + + const auto& text = annotation.text(); + if (text.normalized()) { + CHECK(NormalizedtoPixelCoordinates(text.left(), text.baseline(), + image_width_, image_height_, &left, + &baseline)); + font_size = static_cast(round(text.font_height() * image_height_)); + } else { + left = static_cast(text.left() * scale_factor_); + baseline = static_cast(text.baseline() * scale_factor_); + font_size = static_cast(text.font_height() * scale_factor_); + } + + cv::Point origin(left, baseline); + const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color()); + const int thickness = + ClampThickness(round(annotation.thickness() * scale_factor_)); + const int font_face = text.font_face(); + + const double font_scale = ComputeFontScale(font_face, font_size, thickness); + int text_baseline = 0; + cv::Size text_size = cv::getTextSize(text.display_text(), font_face, + font_scale, thickness, &text_baseline); + + if (text.center_horizontally()) { + origin.x -= text_size.width / 2; + } + if (text.center_vertically()) { + origin.y += text_size.height / 2; + } + + cv::putText(mat_image_, text.display_text(), origin, font_face, font_scale, + color, thickness, /*lineType=*/8, + /*bottomLeftOrigin=*/flip_text_vertically_); +} + +double AnnotationRenderer::ComputeFontScale(int font_face, int font_size, + int thickness) { + double base_line; + double cap_line; + + // The details below of how to compute the font scale from font face, + // thickness, and size were inferred from the OpenCV implementation. + switch (font_face) { + case cv::FONT_HERSHEY_SIMPLEX: + case cv::FONT_HERSHEY_DUPLEX: + case cv::FONT_HERSHEY_COMPLEX: + case cv::FONT_HERSHEY_TRIPLEX: + case cv::FONT_HERSHEY_SCRIPT_SIMPLEX: + case cv::FONT_HERSHEY_SCRIPT_COMPLEX: + base_line = 9; + cap_line = 12; + break; + case cv::FONT_HERSHEY_PLAIN: + base_line = 5; + cap_line = 4; + break; + case cv::FONT_HERSHEY_COMPLEX_SMALL: + base_line = 6; + cap_line = 7; + break; + default: + return -1; + } + + const double thick = static_cast(thickness + 1); + return (static_cast(font_size) - (thick / 2.0F)) / + (cap_line + base_line); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/beauty/calculators/annotation_renderer.h b/mediapipe/graphs/beauty/calculators/annotation_renderer.h new file mode 100644 index 000000000..b2c4ecbea --- /dev/null +++ b/mediapipe/graphs/beauty/calculators/annotation_renderer.h @@ -0,0 +1,158 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_ +#define MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_ + +#include + +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { + +// The renderer library for rendering data on images. +// +// Example usage: +// +// AnnotationRenderer renderer; +// +// std::unique_ptr mat_image(new cv::Mat(kImageHeight, kImageWidth, +// CV_8UC3)); +// +// renderer.AdoptImage(mat_image.get()); +// +// RenderData render_data_0; +// +// +// renderer.RenderDataOnImage(render_data_0); +// +// RenderData render_data_1; +// +// +// renderer.RenderDataOnImage(render_data_1); +// +// UseRenderedImage(mat_image.get()); +class AnnotationRenderer { + public: + explicit AnnotationRenderer() {} + + explicit AnnotationRenderer(const cv::Mat& mat_image) + : image_width_(mat_image.cols), + image_height_(mat_image.rows), + mat_image_(mat_image.clone()) {} + + // Renders the image with the input render data. + void RenderDataOnImage(const RenderData& render_data); + + // Resets the renderer with a new image. Does not own input_image. input_image + // must not be modified by caller during rendering. + void AdoptImage(cv::Mat* input_image); + + // Gets image dimensions. + int GetImageWidth() const; + int GetImageHeight() const; + + // Sets whether text should be rendered upside down. This is default to false + // and text is rendered assuming the underlying image has its origin at the + // top-left corner. Set it to true if the image origin is at the bottom-left + // corner. + void SetFlipTextVertically(bool flip); + + // For GPU rendering optimization in AnnotationOverlayCalculator. + // Scale all incoming coordinates,sizes,thickness,etc. by this amount. + // Should be in the range (0-1]. + // See 'gpu_scale_factor' in annotation_overlay_calculator.proto + void SetScaleFactor(float scale_factor); + float GetScaleFactor() { return scale_factor_; } + + private: + // Draws a rectangle on the image as described in the annotation. + void DrawRectangle(const RenderAnnotation& annotation); + + // Draws a filled rectangle on the image as described in the annotation. + void DrawFilledRectangle(const RenderAnnotation& annotation); + + // Draws an oval on the image as described in the annotation. + void DrawOval(const RenderAnnotation& annotation); + + // Draws a filled oval on the image as described in the annotation. + void DrawFilledOval(const RenderAnnotation& annotation); + + // Draws an arrow on the image as described in the annotation. + void DrawArrow(const RenderAnnotation& annotation); + + // Draws a point on the image as described in the annotation. + void DrawPoint(const RenderAnnotation& annotation); + + // Draws lipstick on the face. + void DrawLipstick(const RenderData& render_data); + + // Whitens teeth. + void WhitenTeeth(const RenderData& render_data); + + // Draws a line segment on the image as described in the annotation. + void DrawLine(const RenderAnnotation& annotation); + + // Draws a 2-tone line segment on the image as described in the annotation. + void DrawGradientLine(const RenderAnnotation& annotation); + + // Draws a text on the image as described in the annotation. + void DrawText(const RenderAnnotation& annotation); + + // Draws a rounded rectangle on the image as described in the annotation. + void DrawRoundedRectangle(const RenderAnnotation& annotation); + + // Draws a filled rounded rectangle on the image as described in the + // annotation. + void DrawFilledRoundedRectangle(const RenderAnnotation& annotation); + + // Helper function for drawing a rectangle with rounded corners. The + // parameters are the same as in the OpenCV function rectangle(). + // corner_radius: A positive int value defining the radius of the round + // corners. + void DrawRoundedRectangle(cv::Mat src, cv::Point top_left, + cv::Point bottom_right, + const cv::Scalar& line_color, int thickness = 1, + int line_type = 8, int corner_radius = 0); + + // Computes the font scale from font_face, size and thickness. + double ComputeFontScale(int font_face, int font_size, int thickness); + + cv::Mat FormFacePartMask(std::vector orderList, const RenderData &render_data); + + cv::Mat predict_forehead_mask(const RenderData &render_data, double face_box_min_y); + + void smooth_face(const RenderData &render_data); + + std::tuple GetFaceBox(const RenderData &render_data); + + // Width and Height of the image (in pixels). + int image_width_ = -1; + int image_height_ = -1; + + // The image for rendering. + cv::Mat mat_image_; + + // See SetFlipTextVertically(bool). + bool flip_text_vertically_ = false; + + // See SetScaleFactor(float) + float scale_factor_ = 1.0; +}; +} // namespace mediapipe + +#endif // MEDIAPIPE_UTIL_ANNOTATION_RENDERER_H_ diff --git a/mediapipe/graphs/beauty/calculators/face_landmarks_to_render_data_calculator.cc b/mediapipe/graphs/beauty/calculators/face_landmarks_to_render_data_calculator.cc new file mode 100644 index 000000000..093a7325d --- /dev/null +++ b/mediapipe/graphs/beauty/calculators/face_landmarks_to_render_data_calculator.cc @@ -0,0 +1,104 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +namespace mediapipe { + +namespace { + +constexpr int kNumFaceLandmarkConnections = 132; +// Pairs of landmark indices to be rendered with connections. +constexpr int kFaceLandmarkConnections[] = { + // Lips. + 61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321, + 321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267, + 269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14, + 14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81, + 81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308, + // Left eye. + 33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133, + 33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173, + 133, + // Left eyebrow. + 46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107, + // Left iris. + 474, 475, 475, 476, 476, 477, 477, 474, + // Right eye. + 263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382, + 362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398, + 398, 362, + // Right eyebrow. + 276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296, + 336, + // Right iris. + 469, 470, 470, 471, 471, 472, 472, 469, + // Face oval. + 10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, + 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, + 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, + 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, + 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; + +} // namespace + +// A calculator that converts face landmarks to RenderData proto for +// visualization. Ignores landmark_connections specified in +// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set +// of landmark connections specific to face landmark (defined in +// kFaceLandmarkConnections[] above). +// +// Example config: +// node { +// calculator: "FaceLandmarksToRenderDataCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// output_stream: "RENDER_DATA:render_data" +// options { +// [LandmarksToRenderDataCalculatorOptions.ext] { +// landmark_color { r: 0 g: 255 b: 0 } +// connection_color { r: 0 g: 255 b: 0 } +// thickness: 4.0 +// } +// } +// } +class FaceLandmarksToRenderDataCalculator + : public LandmarksToRenderDataCalculator { + public: + absl::Status Open(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator); + +absl::Status FaceLandmarksToRenderDataCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + options_ = cc->Options(); + + for (int i = 0; i < kNumFaceLandmarkConnections; ++i) { + landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]); + landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]); + } + + return absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/beauty/face_mesh_desktop.pbtxt b/mediapipe/graphs/beauty/face_mesh_desktop.pbtxt new file mode 100644 index 000000000..215791a36 --- /dev/null +++ b/mediapipe/graphs/beauty/face_mesh_desktop.pbtxt @@ -0,0 +1,70 @@ +# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite +# on CPU. + +# Path to the input video file. (string) +input_side_packet: "input_video_path" +# Path to the output video file. (string) +input_side_packet: "output_video_path" + +# max_queue_size limits the number of packets enqueued on any input stream +# by throttling inputs to the graph. This makes the graph only process one +# frame per time. +max_queue_size: 1 + +# Decodes an input video file into images and a video header. +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + packet { bool_value: true } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input video. +node { + calculator: "FaceRendererCpu" + input_stream: "IMAGE:input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} + +# Encodes the annotated images into a video file, adopting properties specified +# in the input video header, e.g., video framerate. +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:output_video" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} diff --git a/mediapipe/graphs/beauty/face_mesh_desktop_live_gpu.pbtxt b/mediapipe/graphs/beauty/face_mesh_desktop_live_gpu.pbtxt new file mode 100644 index 000000000..ae03709fa --- /dev/null +++ b/mediapipe/graphs/beauty/face_mesh_desktop_live_gpu.pbtxt @@ -0,0 +1,66 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU. + +# Input image. (GpuBuffer) +input_stream: "input_video" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + packet { bool_value: true } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererGpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/beauty/subgraphs/BUILD b/mediapipe/graphs/beauty/subgraphs/BUILD new file mode 100644 index 000000000..7e3c18599 --- /dev/null +++ b/mediapipe/graphs/beauty/subgraphs/BUILD @@ -0,0 +1,57 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "renderer_calculators", + deps = [ + "//mediapipe/calculators/core:split_proto_list_calculator", + "//mediapipe/util:annotation_renderer", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/beauty:form_face_mask_calculator", + "//mediapipe/calculators/beauty:smooth_face_calculator", + "//mediapipe/calculators/beauty:draw_lipstick_calculator", + "//mediapipe/calculators/beauty:whiten_teeth_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + "//mediapipe/graphs/beauty/calculators:face_landmarks_to_render_data_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_renderer_gpu", + graph = "face_renderer_gpu.pbtxt", + register_as = "FaceRendererGpu", + deps = [ + ":renderer_calculators", + ], +) + +mediapipe_simple_subgraph( + name = "face_renderer_cpu", + graph = "face_renderer_cpu.pbtxt", + register_as = "FaceRendererCpu", + deps = [ + ":renderer_calculators", + ], +) diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt new file mode 100644 index 000000000..37c5416ee --- /dev/null +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt @@ -0,0 +1,127 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" +# Regions of interest calculated based on palm detections. +# (std::vector) +input_stream: "NORM_RECTS:rects" +# Detected palms. (std::vector) +input_stream: "DETECTIONS:detections" + +# CPU image with rendered data. (ImageFrame) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_image" + output_stream: "SIZE:image_size" +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:detections_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmarks_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +#node { +# calculator: "RectToRenderDataCalculator" +# input_stream: "NORM_RECTS:rects" +# output_stream: "RENDER_DATA:rects_render_data" +# node_options: { +# [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { +# filled: false +# color { r: 255 g: 0 b: 0 } +# thickness: 4.0 +# } +# } +#} + +node { + calculator: "FormFaceMaskCalculator" + input_stream: "IMAGE:input_image" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + output_stream: "FACEBOX:face_box" + output_stream: "MASK:multi_mask" +} + +node { + calculator: "DrawLipstickCalculator" + input_stream: "IMAGE:input_image" + input_stream: "MASK:0:multi_mask" + output_stream: "IMAGE:input_image_1" +} + +node { + calculator: "WhitenTeethCalculator" + input_stream: "IMAGE:input_image_1" + input_stream: "MASK:0:multi_mask" + output_stream: "IMAGE:input_image_2" +} + +node { + calculator: "SmoothFaceCalculator" + input_stream: "IMAGE:input_image_2" + input_stream: "MASK:0:multi_mask" + input_stream: "FACEBOX:face_box" + output_stream: "IMAGE:output_image" +} + +# Draws annotations and overlays them on top of the input images. +#node { +# calculator: "AnnotationOverlayCalculator" +# input_stream: "IMAGE:input_image" +# input_stream: "VECTOR:0:multi_face_landmarks_render_data" +# output_stream: "IMAGE:output_image" +#} + + + diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu.pbtxt new file mode 100644 index 000000000..972c04cf4 --- /dev/null +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu.pbtxt @@ -0,0 +1,96 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" +# Regions of interest calculated based on palm detections. +# (std::vector) +input_stream: "NORM_RECTS:rects" +# Detected palms. (std::vector) +input_stream: "DETECTIONS:detections" + +# GPU image with rendered data. (GpuBuffer) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_image" + output_stream: "SIZE:image_size" +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:detections_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:end_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmarks_render_data" + input_stream: "BATCH_END:end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:rects" + output_stream: "RENDER_DATA:rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE_GPU:input_image" + #input_stream: "detections_render_data" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + #input_stream: "rects_render_data" + output_stream: "IMAGE_GPU:output_image" +} diff --git a/mediapipe/graphs/beauty/video.mp4 b/mediapipe/graphs/beauty/video.mp4 new file mode 100644 index 000000000..49868c557 Binary files /dev/null and b/mediapipe/graphs/beauty/video.mp4 differ