diff --git a/mediapipe/calculators/landmarks/BUILD b/mediapipe/calculators/landmarks/BUILD new file mode 100644 index 000000000..00a86f465 --- /dev/null +++ b/mediapipe/calculators/landmarks/BUILD @@ -0,0 +1,45 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "landmarks_to_mask_calculator", + srcs = ["landmarks_to_mask_calculator.cc"], + hdrs = ["landmarks_to_mask_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:vector", + ], + alwayslink = 1, +) diff --git a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc new file mode 100644 index 000000000..64f81b481 --- /dev/null +++ b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.cc @@ -0,0 +1,450 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h" + +#include + +#include +#include +#include +#include +#include + +#include + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/vector.h" + +namespace mediapipe +{ + namespace + { + constexpr char kLandmarksTag[] = "LANDMARKS"; + constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; + constexpr char kLandmarkLabel[] = "KEYPOINT"; + constexpr char kVectorTag[] = "VECTOR"; + constexpr char kMaskTag[] = "MASK"; + constexpr char kFaceBoxTag[] = "FACEBOX"; + constexpr char kImageFrameTag[] = "IMAGE"; + + static const std::vector UPPER_LIP = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}; + static const std::vector LOWER_LIP = {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; + static const std::vector FACE_OVAL = {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, + 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, + 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, + 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, + 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; + static const std::vector MOUTH_INSIDE = {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}; + static const std::vector PART_FOREHEAD_B = {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}; + static const std::vector LEFT_EYE = {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}; + static const std::vector RIGHT_EYE = {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}; + static const std::vector LIPS = {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}; + static const std::vector LEFT_BROW = {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}; + static const std::vector RIGHT_BROW = {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}; + + template + bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark, + bool utilize_visibility, + float visibility_threshold, + bool utilize_presence, + float presence_threshold) + { + if (utilize_visibility && landmark.has_visibility() && + landmark.visibility() < visibility_threshold) + { + return false; + } + if (utilize_presence && landmark.has_presence() && + landmark.presence() < presence_threshold) + { + return false; + } + return true; + } + + bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y, + int image_width, int image_height, int *x_px, + int *y_px) + { + CHECK(x_px != nullptr); + CHECK(y_px != nullptr); + CHECK_GT(image_width, 0); + CHECK_GT(image_height, 0); + + if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 || + normalized_y > 1.0) + { + VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0"; + } + + *x_px = static_cast(round(normalized_x * image_width)); + *y_px = static_cast(round(normalized_y * image_height)); + + return true; + } + + std::tuple face_box; + + float scale_factor_ = 1.0; + + bool image_frame_available_ = false; + + } // namespace + + absl::Status LandmarksToMaskCalculator::GetContract( + CalculatorContract *cc) + { + RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) || + cc->Inputs().HasTag(kNormLandmarksTag)) + << "None of the input streams are provided."; + RET_CHECK(!(cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().HasTag(kNormLandmarksTag))) + << "Can only one type of landmark can be taken. Either absolute or " + "normalized landmarks."; + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + } + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + cc->Inputs().Tag(kLandmarksTag).Set(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + cc->Inputs().Tag(kNormLandmarksTag).Set(); + } + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs().Tag(kMaskTag).Set>(); + } + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs().Tag(kFaceBoxTag).Set>(); + } + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + image_frame_available_ = true; + } + else + { + } + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::Process(CalculatorContext *cc) + { + // Check that landmarks are not empty and skip rendering if so. + // Don't emit an empty packet for this timestamp. + if (cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().Tag(kLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag) && + cc->Inputs().Tag(kNormLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + + std::unique_ptr image_mat; + ImageFormat::Format target_format; + std::unordered_map all_masks; + + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + std::unordered_map> orderList; + orderList.insert(make_pair("UPPER_LIP", UPPER_LIP)); + orderList.insert(make_pair("LOWER_LIP", LOWER_LIP)); + orderList.insert(make_pair("FACE_OVAL", FACE_OVAL)); + orderList.insert(make_pair("MOUTH_INSIDE", MOUTH_INSIDE)); + orderList.insert(make_pair("LEFT_EYE", LEFT_EYE)); + orderList.insert(make_pair("RIGHT_EYE", RIGHT_EYE)); + orderList.insert(make_pair("LEFT_BROW", LEFT_BROW)); + orderList.insert(make_pair("RIGHT_BROW", RIGHT_BROW)); + orderList.insert(make_pair("LIPS", LIPS)); + orderList.insert(make_pair("PART_FOREHEAD_B", PART_FOREHEAD_B)); + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + const LandmarkList &landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + int c = 0; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + c = 0; + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const Landmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + if (order == c) + { + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + c += 1; + } + } + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + const NormalizedLandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + int c = 0; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + c = 0; + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const NormalizedLandmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + if (order == c) + { + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + c += 1; + } + } + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + + MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks)); + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::RenderToCpu(CalculatorContext *cc, + std::unordered_map &all_masks) + { + + auto output_frame = absl::make_unique>(all_masks, all_masks.get_allocator()); + + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs() + .Tag(kMaskTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + auto output_frame2 = absl::make_unique>(face_box); + + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs() + .Tag(kFaceBoxTag) + .Add(output_frame2.release(), cc->InputTimestamp()); + } + + all_masks.clear(); + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC4, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + /* absl::Status LandmarksToMaskCalculator::GetFaceBox(std::unique_ptr &image_mat, + const RenderData &render_data) + { + cv::Mat mat_image_ = *image_mat.get(); + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + std::vector x_s, y_s; + double box_min_y, box_max_y, box_max_x, box_min_x; + + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const Landmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark.point(); + int x = -1; + int y = -1; + if (point.normalized()) + { + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + } + else + { + x = static_cast(point.x() * scale_factor_); + y = static_cast(point.y() * scale_factor_); + } + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); + + return absl::OkStatus(); + } */ + + REGISTER_CALCULATOR(LandmarksToMaskCalculator); +} // namespace mediapipe diff --git a/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h new file mode 100644 index 000000000..c8798b350 --- /dev/null +++ b/mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h @@ -0,0 +1,96 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_ + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/vector.h" + +namespace mediapipe +{ + + // A calculator that converts Landmark proto to RenderData proto for + // visualization. The input should be LandmarkList proto. It is also possible + // to specify the connections between landmarks. + // + // Example config: + // node { + // calculator: "LandmarksToMaskCalculator" + // input_stream: "NORM_LANDMARKS:landmarks" + // output_stream: "RENDER_DATA:render_data" + // options { + // [LandmarksToRenderDataCalculatorOptions.ext] { + // landmark_connections: [0, 1, 1, 2] + // landmark_color { r: 0 g: 255 b: 0 } + // connection_color { r: 0 g: 255 b: 0 } + // thickness: 4.0 + // } + // } + // } + class LandmarksToMaskCalculator : public CalculatorBase + { + public: + LandmarksToMaskCalculator() = default; + ~LandmarksToMaskCalculator() override = default; + LandmarksToMaskCalculator(const LandmarksToMaskCalculator &) = + delete; + LandmarksToMaskCalculator &operator=( + const LandmarksToMaskCalculator &) = delete; + + static absl::Status GetContract(CalculatorContract *cc); + + absl::Status Open(CalculatorContext *cc) override; + + absl::Status Process(CalculatorContext *cc) override; + + private: + absl::Status RenderToCpu(CalculatorContext *cc, + std::unordered_map &all_masks); + + absl::Status GetFaceBox(std::unique_ptr &image_mat, + const RenderData &render_data); + absl::Status CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + }; + +} // namespace mediapipe +#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_ diff --git a/mediapipe/examples/desktop/image_style/BUILD b/mediapipe/examples/desktop/image_style/BUILD new file mode 100644 index 000000000..503cde103 --- /dev/null +++ b/mediapipe/examples/desktop/image_style/BUILD @@ -0,0 +1,36 @@ +# Copyright 2021 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +# Linux only +cc_binary( + name = "image_style_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/image_style:desktop_calculators", + ], +) + +cc_binary( + name = "image_style_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/image_style:desktop_calculators", + ], +) + + diff --git a/mediapipe/graphs/beauty/beauty.pbtxt b/mediapipe/graphs/beauty/beauty.pbtxt index 2cc563424..4b963ef88 100644 --- a/mediapipe/graphs/beauty/beauty.pbtxt +++ b/mediapipe/graphs/beauty/beauty.pbtxt @@ -50,9 +50,6 @@ node { input_side_packet: "NUM_FACES:num_faces" input_side_packet: "WITH_ATTENTION:with_attention" output_stream: "LANDMARKS:multi_face_landmarks" - output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" - output_stream: "DETECTIONS:face_detections" - output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" } # Subgraph that renders face-landmark annotation onto the input image. @@ -60,7 +57,5 @@ node { calculator: "FaceRendererCpu" input_stream: "IMAGE:throttled_input_video" input_stream: "LANDMARKS:multi_face_landmarks" - input_stream: "NORM_RECTS:face_rects_from_landmarks" - input_stream: "DETECTIONS:face_detections" output_stream: "IMAGE:output_video" } diff --git a/mediapipe/graphs/beauty/beauty_mobile.pbtxt b/mediapipe/graphs/beauty/beauty_mobile.pbtxt index 8e4d262cc..aad5fab25 100644 --- a/mediapipe/graphs/beauty/beauty_mobile.pbtxt +++ b/mediapipe/graphs/beauty/beauty_mobile.pbtxt @@ -51,9 +51,6 @@ node { input_side_packet: "NUM_FACES:num_faces" input_side_packet: "WITH_ATTENTION:with_attention" output_stream: "LANDMARKS:multi_face_landmarks" - output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" - output_stream: "DETECTIONS:face_detections" - output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" } # Defines side packets for further use in the graph. @@ -69,8 +66,6 @@ node { calculator: "FaceRendererCpu" input_stream: "IMAGE:throttled_input_video_cpu" input_stream: "LANDMARKS:multi_face_landmarks" - input_stream: "NORM_RECTS:face_rects_from_landmarks" - input_stream: "DETECTIONS:face_detections" output_stream: "IMAGE:output_video_cpu" } diff --git a/mediapipe/graphs/beauty/subgraphs/BUILD b/mediapipe/graphs/beauty/subgraphs/BUILD index 7e3c18599..0d92f566d 100644 --- a/mediapipe/graphs/beauty/subgraphs/BUILD +++ b/mediapipe/graphs/beauty/subgraphs/BUILD @@ -33,6 +33,7 @@ cc_library( "//mediapipe/calculators/beauty:whiten_teeth_calculator", "//mediapipe/calculators/util:detections_to_render_data_calculator", "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/landmarks:landmarks_to_mask_calculator", "//mediapipe/calculators/util:rect_to_render_data_calculator", "//mediapipe/graphs/beauty/calculators:face_landmarks_to_render_data_calculator", ], diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt index 37c5416ee..d019995f6 100644 --- a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt @@ -7,11 +7,6 @@ input_stream: "IMAGE:input_image" # Collection of detected/predicted faces, each represented as a list of # landmarks. (std::vector) input_stream: "LANDMARKS:multi_face_landmarks" -# Regions of interest calculated based on palm detections. -# (std::vector) -input_stream: "NORM_RECTS:rects" -# Detected palms. (std::vector) -input_stream: "DETECTIONS:detections" # CPU image with rendered data. (ImageFrame) output_stream: "IMAGE:output_image" @@ -22,19 +17,6 @@ node { output_stream: "SIZE:image_size" } -# Converts detections to drawing primitives for annotation overlay. -node { - calculator: "DetectionsToRenderDataCalculator" - input_stream: "DETECTIONS:detections" - output_stream: "RENDER_DATA:detections_render_data" - node_options: { - [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { - thickness: 4.0 - color { r: 0 g: 255 b: 0 } - } - } -} - # Outputs each element of multi_face_landmarks at a fake timestamp for the rest # of the graph to process. At the end of the loop, outputs the BATCH_END # timestamp for downstream calculators to inform them that all elements in the @@ -42,57 +24,39 @@ node { node { calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITERABLE:multi_face_landmarks" + input_stream: "CLONE:input_image" output_stream: "ITEM:face_landmarks" + output_stream: "CLONE:loop_image" output_stream: "BATCH_END:landmark_timestamp" } # Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "FaceLandmarksToRenderDataCalculator" + calculator: "LandmarksToMaskCalculator" + input_stream: "IMAGE:loop_image" input_stream: "NORM_LANDMARKS:face_landmarks" - output_stream: "RENDER_DATA:landmarks_render_data" - node_options: { - [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { - landmark_color { r: 255 g: 0 b: 0 } - connection_color { r: 0 g: 255 b: 0 } - thickness: 2 - visualize_landmark_depth: false - } - } + output_stream: "FACEBOX:face_box" + output_stream: "MASK:mask" } # Collects a RenderData object for each hand into a vector. Upon receiving the # BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END # timestamp. node { - calculator: "EndLoopRenderDataCalculator" - input_stream: "ITEM:landmarks_render_data" + calculator: "EndLoopMapMaskCalculator" + input_stream: "ITEM:mask" input_stream: "BATCH_END:landmark_timestamp" - output_stream: "ITERABLE:multi_face_landmarks_render_data" + output_stream: "ITERABLE:multi_mask" } -# Converts normalized rects to drawing primitives for annotation overlay. -#node { -# calculator: "RectToRenderDataCalculator" -# input_stream: "NORM_RECTS:rects" -# output_stream: "RENDER_DATA:rects_render_data" -# node_options: { -# [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { -# filled: false -# color { r: 255 g: 0 b: 0 } -# thickness: 4.0 -# } -# } -#} - node { - calculator: "FormFaceMaskCalculator" - input_stream: "IMAGE:input_image" - input_stream: "VECTOR:0:multi_face_landmarks_render_data" - output_stream: "FACEBOX:face_box" - output_stream: "MASK:multi_mask" + calculator: "EndLoopFaceBoxCalculator" + input_stream: "ITEM:face_box" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_face_box" } + node { calculator: "DrawLipstickCalculator" input_stream: "IMAGE:input_image" @@ -111,17 +75,9 @@ node { calculator: "SmoothFaceCalculator" input_stream: "IMAGE:input_image_2" input_stream: "MASK:0:multi_mask" - input_stream: "FACEBOX:face_box" + input_stream: "FACEBOX:multi_face_box" output_stream: "IMAGE:output_image" } -# Draws annotations and overlays them on top of the input images. -#node { -# calculator: "AnnotationOverlayCalculator" -# input_stream: "IMAGE:input_image" -# input_stream: "VECTOR:0:multi_face_landmarks_render_data" -# output_stream: "IMAGE:output_image" -#} - diff --git a/mediapipe/graphs/image_style/BUILD b/mediapipe/graphs/image_style/BUILD new file mode 100644 index 000000000..90e8d9346 --- /dev/null +++ b/mediapipe/graphs/image_style/BUILD @@ -0,0 +1,64 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "mobile_calculators", + deps = [ + "//mediapipe/calculators/tensorflow:tensor_to_image_frame_calculator", + "//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + ], +) + +cc_library( + name = "desktop_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_gpuimage_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator", + ], +) + +mediapipe_binary_graph( + name = "mobile_gpu_binary_graph", + graph = "image_style.pbtxt", + output_name = "mobile_gpu.binarypb", + deps = [":mobile_calculators"], +) diff --git a/mediapipe/graphs/image_style/image_style.pbtxt b/mediapipe/graphs/image_style/image_style.pbtxt new file mode 100644 index 000000000..a1860d14f --- /dev/null +++ b/mediapipe/graphs/image_style/image_style.pbtxt @@ -0,0 +1,84 @@ +# MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU. +# Used in the example in +# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. + +# Images on GPU coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + + +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored in +# tflite::gpu::GlBuffer. The zero_center option is set to false to normalize the +# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the +# max_num_channels option set to 4, all 4 RGBA channels are contained in the +# image tensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "TENSORS_GPU:image_tensor" + options { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + output_tensor_float_range { + min: 0 + max: 255 + } + } + } +} + + +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:image_tensor" + output_stream: "TENSORS:stylized_tensor" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/metaf-512-mobile3.tflite" + use_gpu: true + } + } +} + +node { + calculator: "TfLiteTensorsToSegmentationCalculator" + input_stream: "TENSORS:stylized_tensor" + output_stream: "MASK:mask_image" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] { + tensor_width: 256 + tensor_height: 256 + tensor_channels: 3 + } + } +} + +# Transfers the annotated image from CPU back to GPU memory, to be sent out of +# the graph. +node: { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "mask_image" + output_stream: "output_video" +} diff --git a/mediapipe/graphs/image_style/image_style_cpu (copy).pbtxt b/mediapipe/graphs/image_style/image_style_cpu (copy).pbtxt new file mode 100644 index 000000000..6d9d64318 --- /dev/null +++ b/mediapipe/graphs/image_style/image_style_cpu (copy).pbtxt @@ -0,0 +1,96 @@ +# MediaPipe graph that performs object detection on desktop with TensorFlow Lite +# on CPU. +# Used in the example in +# mediapipe/examples/desktop/object_detection:object_detection_tflite. + +# max_queue_size limits the number of packets enqueued on any input stream +# by throttling inputs to the graph. This makes the graph only process one +# frame per time. +max_queue_size: 1 + +# Decodes an input video file into images and a video header. +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +# Transforms the input image on CPU to a 320x320 image. To scale the image, by +# default it uses the STRETCH scale mode that maps the entire input image to the +# entire transformed image. As a result, image aspect ratio may be changed and +# objects in the image may be deformed (stretched or squeezed), but the object +# detection model used in this graph is agnostic to that deformation. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 512 + output_height: 512 + } + } +} + +# Converts the transformed input image on CPU into an image tensor as a +# TfLiteTensor. The zero_center option is set to true to normalize the +# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f]. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_input_video" + output_stream: "TENSORS:image_tensor" + node_options: { + [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] { + zero_center: true + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:stylized_tensor" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/metaf-512-mobile3.tflite" + } + } +} + +node { + calculator: "TfliteTensorsToGpuImageCalculator" + input_stream: "TENSORS:stylized_tensor" + output_stream: "IMAGE:image" +} + +#node { +# calculator: "TfLiteTensorsToSegmentationCalculator" +# input_stream: "TENSORS:stylized_tensor" +# output_stream: "MASK:mask_image" +# node_options: { +# [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] { +# tensor_width: 512 +# tensor_height: 512 +# tensor_channels: 3 +# } +# } +#} + +# Encodes the annotated images into a video file, adopting properties specified +# in the input video header, e.g., video framerate. +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:image" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} diff --git a/mediapipe/graphs/image_style/image_style_cpu.pbtxt b/mediapipe/graphs/image_style/image_style_cpu.pbtxt new file mode 100644 index 000000000..1a78cf6c0 --- /dev/null +++ b/mediapipe/graphs/image_style/image_style_cpu.pbtxt @@ -0,0 +1,93 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Transforms the input image on CPU to a 320x320 image. To scale the image, by +# default it uses the STRETCH scale mode that maps the entire input image to the +# entire transformed image. As a result, image aspect ratio may be changed and +# objects in the image may be deformed (stretched or squeezed), but the object +# detection model used in this graph is agnostic to that deformation. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + } + } +} + +# Converts the transformed input image on CPU into an image tensor as a +# TfLiteTensor. The zero_center option is set to true to normalize the +# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f]. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_input_video" + output_stream: "TENSORS:input_tensors" + options { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + output_tensor_float_range { + min: 0 + max: 255 + } + max_num_channels: 3 + } + } +} + + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/model_float32.tflite" + } + } +} + + +node { + calculator: "TfLiteTensorsToSegmentationCalculator" + input_stream: "TENSORS:output_tensors" + output_stream: "MASK:output_video" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] { + tensor_width: 256 + tensor_height: 256 + tensor_channels: 3 + } + } +} + diff --git a/mediapipe/graphs/image_style/image_style_gpu.pbtxt b/mediapipe/graphs/image_style/image_style_gpu.pbtxt new file mode 100644 index 000000000..7e48e800e --- /dev/null +++ b/mediapipe/graphs/image_style/image_style_gpu.pbtxt @@ -0,0 +1,82 @@ +# MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU. +# Used in the example in +# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. + +# Images on GPU coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + + +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 512 + output_height: 512 + } + } +} + +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "TENSORS:input_tensors" + options { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 512 + output_tensor_height: 512 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 255.0 + } + gpu_origin: TOP_LEFT + border_mode: BORDER_REPLICATE + } + } +} + +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS_GPU:input_tensors" + output_stream: "TENSORS_GPU:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/models/metaf-512-mobile3.tflite" + delegate { gpu {} } + } + } +} + +# Processes the output tensors into a segmentation mask that has the same size +# as the input image into the graph. +node { + calculator: "TensorsToSegmentationCalculator" + input_stream: "TENSORS:output_tensors" + output_stream: "MASK:mask_image" + options: { + [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { + activation: NONE + } + } +} + + +node: { + calculator: "FromImageCalculator" + input_stream: "IMAGE:mask_image" + output_stream: "IMAGE_GPU:output_video" +} diff --git a/mediapipe/models/model_float32.tflite b/mediapipe/models/model_float32.tflite new file mode 100644 index 000000000..ef9762bb8 Binary files /dev/null and b/mediapipe/models/model_float32.tflite differ diff --git a/video.mp4 b/video.mp4 new file mode 100644 index 000000000..49868c557 Binary files /dev/null and b/video.mp4 differ