diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc index d21bc03a4..fd2fd89ae 100644 --- a/mediapipe/calculators/core/end_loop_calculator.cc +++ b/mediapipe/calculators/core/end_loop_calculator.cc @@ -17,11 +17,12 @@ #include #include "mediapipe/framework/formats/classification.pb.h" -#include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/util/render_data.pb.h" #include "tensorflow/lite/interpreter.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" namespace mediapipe { @@ -48,11 +49,15 @@ typedef EndLoopCalculator> EndLoopClassificationListCalculator; REGISTER_CALCULATOR(EndLoopClassificationListCalculator); +typedef EndLoopCalculator>> + EndLoopMapMaskCalculator; +REGISTER_CALCULATOR(EndLoopMapMaskCalculator); + +typedef EndLoopCalculator>> + EndLoopFaceBoxCalculator; +REGISTER_CALCULATOR(EndLoopFaceBoxCalculator); + typedef EndLoopCalculator> EndLoopTensorCalculator; REGISTER_CALCULATOR(EndLoopTensorCalculator); -typedef EndLoopCalculator> - EndLoopDetectionCalculator; -REGISTER_CALCULATOR(EndLoopDetectionCalculator); - } // namespace mediapipe diff --git a/mediapipe/graphs/beauty/BUILD b/mediapipe/graphs/beauty/BUILD index 7c191aec9..1fff633f5 100644 --- a/mediapipe/graphs/beauty/BUILD +++ b/mediapipe/graphs/beauty/BUILD @@ -42,6 +42,16 @@ cc_library( ], ) +cc_library( + name = "desktop_live_over_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_cpu_over", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + cc_library( name = "desktop_live_gpu_calculators", deps = [ @@ -63,9 +73,29 @@ cc_library( ], ) +cc_library( + name = "mobile_calculators_over", + deps = [ + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/beauty/subgraphs:face_renderer_gpu_over", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + + mediapipe_binary_graph( name = "beauty_mobile_gpu_binary_graph", graph = "beauty_mobile.pbtxt", output_name = "beauty_mobile_gpu.binarypb", deps = [":mobile_calculators"], ) + +mediapipe_binary_graph( + name = "beauty_mobile_over_binary_graph", + graph = "beauty_over.pbtxt", + output_name = "beauty_mobile_over.binarypb", + deps = [":mobile_calculators_over"], +) + diff --git a/mediapipe/graphs/beauty/beauty_over.pbtxt b/mediapipe/graphs/beauty/beauty_over.pbtxt new file mode 100644 index 000000000..28a1e2f38 --- /dev/null +++ b/mediapipe/graphs/beauty/beauty_over.pbtxt @@ -0,0 +1,68 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU. + +# GPU buffer. (GpuBuffer) +input_stream: "input_video" + +# Max number of faces to detect/process. (int) +input_side_packet: "num_faces" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:with_attention" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { bool_value: true } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererGpuOver" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} + diff --git a/mediapipe/graphs/beauty/subgraphs/BUILD b/mediapipe/graphs/beauty/subgraphs/BUILD index 0d92f566d..d96ef2ec5 100644 --- a/mediapipe/graphs/beauty/subgraphs/BUILD +++ b/mediapipe/graphs/beauty/subgraphs/BUILD @@ -48,6 +48,15 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_renderer_gpu_over", + graph = "face_renderer_gpu_over.pbtxt", + register_as = "FaceRendererGpuOver", + deps = [ + ":renderer_calculators", + ], +) + mediapipe_simple_subgraph( name = "face_renderer_cpu", graph = "face_renderer_cpu.pbtxt", diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt index d019995f6..8d6d9ddd3 100644 --- a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu.pbtxt @@ -75,7 +75,7 @@ node { calculator: "SmoothFaceCalculator" input_stream: "IMAGE:input_image_2" input_stream: "MASK:0:multi_mask" - input_stream: "FACEBOX:multi_face_box" + input_stream: "FACEBOX:0:multi_face_box" output_stream: "IMAGE:output_image" } diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu_old.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu_old.pbtxt new file mode 100644 index 000000000..0b866b21b --- /dev/null +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_cpu_old.pbtxt @@ -0,0 +1,109 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" + +# CPU image with rendered data. (ImageFrame) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmarks_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +#node { +# calculator: "RectToRenderDataCalculator" +# input_stream: "NORM_RECTS:rects" +# output_stream: "RENDER_DATA:rects_render_data" +# node_options: { +# [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { +# filled: false +# color { r: 255 g: 0 b: 0 } +# thickness: 4.0 +# } +# } +#} + +node { + calculator: "FormFaceMaskCalculator" + input_stream: "IMAGE:input_image" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + output_stream: "FACEBOX:face_box" + output_stream: "MASK:multi_mask" +} + +node { + calculator: "DrawLipstickCalculator" + input_stream: "IMAGE:input_image" + input_stream: "MASK:0:multi_mask" + output_stream: "IMAGE:input_image_1" +} + +node { + calculator: "WhitenTeethCalculator" + input_stream: "IMAGE:input_image_1" + input_stream: "MASK:0:multi_mask" + output_stream: "IMAGE:input_image_2" +} + +node { + calculator: "SmoothFaceCalculator" + input_stream: "IMAGE:input_image_2" + input_stream: "MASK:0:multi_mask" + input_stream: "FACEBOX:face_box" + output_stream: "IMAGE:output_image" +} + +# Draws annotations and overlays them on top of the input images. +#node { +# calculator: "AnnotationOverlayCalculator" +# input_stream: "IMAGE:input_image" +# input_stream: "VECTOR:0:multi_face_landmarks_render_data" +# output_stream: "IMAGE:output_image" +#} + + + diff --git a/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu_over.pbtxt b/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu_over.pbtxt new file mode 100644 index 000000000..702c42086 --- /dev/null +++ b/mediapipe/graphs/beauty/subgraphs/face_renderer_gpu_over.pbtxt @@ -0,0 +1,96 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" +# Regions of interest calculated based on palm detections. +# (std::vector) +input_stream: "NORM_RECTS:rects" +# Detected palms. (std::vector) +input_stream: "DETECTIONS:detections" + +# GPU image with rendered data. (GpuBuffer) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_image" + output_stream: "SIZE:image_size" +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:detections_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:end_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmarks_render_data" + input_stream: "BATCH_END:end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +#node { +# calculator: "RectToRenderDataCalculator" +# input_stream: "NORM_RECTS:rects" +# output_stream: "RENDER_DATA:rects_render_data" +# node_options: { +# [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { +# filled: false +# color { r: 255 g: 0 b: 0 } +# thickness: 4.0 +# } +# } +#} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE_GPU:input_image" + #input_stream: "detections_render_data" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + #input_stream: "rects_render_data" + output_stream: "IMAGE_GPU:output_image" +} diff --git a/mediapipe/landmarks/BUILD b/mediapipe/landmarks/BUILD new file mode 100644 index 000000000..00a86f465 --- /dev/null +++ b/mediapipe/landmarks/BUILD @@ -0,0 +1,45 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "landmarks_to_mask_calculator", + srcs = ["landmarks_to_mask_calculator.cc"], + hdrs = ["landmarks_to_mask_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:vector", + ], + alwayslink = 1, +) diff --git a/mediapipe/landmarks/landmarks_to_mask_calculator.cc b/mediapipe/landmarks/landmarks_to_mask_calculator.cc new file mode 100644 index 000000000..600cd8ac3 --- /dev/null +++ b/mediapipe/landmarks/landmarks_to_mask_calculator.cc @@ -0,0 +1,455 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "mediapipe/calculators/landmarks/landmarks_to_mask_calculator.h" + +#include + +#include +#include +#include +#include +#include + +#include + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/vector.h" + +namespace mediapipe +{ + namespace + { + constexpr char kLandmarksTag[] = "LANDMARKS"; + constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; + constexpr char kLandmarkLabel[] = "KEYPOINT"; + constexpr char kVectorTag[] = "VECTOR"; + constexpr char kMaskTag[] = "MASK"; + constexpr char kFaceBoxTag[] = "FACEBOX"; + constexpr char kImageFrameTag[] = "IMAGE"; + + std::unordered_map> orderList = { + {"UPPER_LIP", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78}}, + {"LOWER_LIP", {61, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}}, + {"FACE_OVAL", {10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}}, + {"MOUTH_INSIDE", {78, 191, 80, 81, 13, 312, 311, 310, 415, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95}}, + {"LEFT_EYE", {130, 33, 246, 161, 160, 159, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7}}, + {"RIGHT_EYE", {362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382}}, + {"LEFT_BROW", {70, 63, 105, 66, 107, 55, 65, 52, 53, 46}}, + {"RIGHT_BROW", {336, 296, 334, 293, 301, 300, 283, 282, 295, 285}}, + {"LIPS", {61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 314, 17, 84, 181, 91, 146}}, + {"PART_FOREHEAD_B", {21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251, 301, 293, 334, 296, 336, 9, 107, 66, 105, 63, 71}}, + }; + + template + bool IsLandmarkVisibleAndPresent(const LandmarkType &landmark, + bool utilize_visibility, + float visibility_threshold, + bool utilize_presence, + float presence_threshold) + { + if (utilize_visibility && landmark.has_visibility() && + landmark.visibility() < visibility_threshold) + { + return false; + } + if (utilize_presence && landmark.has_presence() && + landmark.presence() < presence_threshold) + { + return false; + } + return true; + } + + bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y, + int image_width, int image_height, int *x_px, + int *y_px) + { + CHECK(x_px != nullptr); + CHECK(y_px != nullptr); + CHECK_GT(image_width, 0); + CHECK_GT(image_height, 0); + + if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 || + normalized_y > 1.0) + { + VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0"; + } + + *x_px = static_cast(round(normalized_x * image_width)); + *y_px = static_cast(round(normalized_y * image_height)); + + return true; + } + + std::tuple face_box; + + float scale_factor_ = 1.0; + + bool image_frame_available_ = false; + + } // namespace + + absl::Status LandmarksToMaskCalculator::GetContract( + CalculatorContract *cc) + { + RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) || + cc->Inputs().HasTag(kNormLandmarksTag)) + << "None of the input streams are provided."; + RET_CHECK(!(cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().HasTag(kNormLandmarksTag))) + << "Can only one type of landmark can be taken. Either absolute or " + "normalized landmarks."; + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + cc->Inputs().Tag(kImageFrameTag).Set(); + } + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + cc->Inputs().Tag(kLandmarksTag).Set(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + cc->Inputs().Tag(kNormLandmarksTag).Set(); + } + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs().Tag(kMaskTag).Set>(); + } + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs().Tag(kFaceBoxTag).Set>(); + } + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::Open(CalculatorContext *cc) + { + cc->SetOffset(TimestampDiff(0)); + + if (cc->Inputs().HasTag(kImageFrameTag)) + { + image_frame_available_ = true; + } + else + { + } + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::Process(CalculatorContext *cc) + { + // Check that landmarks are not empty and skip rendering if so. + // Don't emit an empty packet for this timestamp. + if (cc->Inputs().HasTag(kLandmarksTag) && + cc->Inputs().Tag(kLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kNormLandmarksTag) && + cc->Inputs().Tag(kNormLandmarksTag).IsEmpty()) + { + return absl::OkStatus(); + } + if (cc->Inputs().HasTag(kImageFrameTag) && + cc->Inputs().Tag(kImageFrameTag).IsEmpty()) + { + return absl::OkStatus(); + } + + // Initialize render target, drawn with OpenCV. + + std::unique_ptr image_mat; + ImageFormat::Format target_format; + std::unordered_map all_masks; + + MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format)); + + MP_RETURN_IF_ERROR(GetMasks(cc, all_masks, image_mat)); + + MP_RETURN_IF_ERROR(GetFaceBox(cc, image_mat)); + + MP_RETURN_IF_ERROR(RenderToCpu(cc, all_masks)); + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::RenderToCpu(CalculatorContext *cc, + std::unordered_map &all_masks) + { + + auto output_frame = absl::make_unique>(all_masks, all_masks.get_allocator()); + + if (cc->Outputs().HasTag(kMaskTag)) + { + cc->Outputs() + .Tag(kMaskTag) + .Add(output_frame.release(), cc->InputTimestamp()); + } + + auto output_frame2 = absl::make_unique>(face_box); + + if (cc->Outputs().HasTag(kFaceBoxTag)) + { + cc->Outputs() + .Tag(kFaceBoxTag) + .Add(output_frame2.release(), cc->InputTimestamp()); + } + + all_masks.clear(); + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format) + { + if (image_frame_available_) + { + const auto &input_frame = + cc->Inputs().Tag(kImageFrameTag).Get(); + + int target_mat_type; + switch (input_frame.Format()) + { + case ImageFormat::SRGBA: + *target_format = ImageFormat::SRGBA; + target_mat_type = CV_8UC4; + break; + case ImageFormat::SRGB: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + case ImageFormat::GRAY8: + *target_format = ImageFormat::SRGB; + target_mat_type = CV_8UC3; + break; + default: + return absl::UnknownError("Unexpected image frame format."); + break; + } + + image_mat = absl::make_unique( + input_frame.Height(), input_frame.Width(), target_mat_type); + + auto input_mat = formats::MatView(&input_frame); + + if (input_frame.Format() == ImageFormat::GRAY8) + { + cv::Mat rgb_mat; + cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB); + rgb_mat.copyTo(*image_mat); + } + else + { + input_mat.copyTo(*image_mat); + } + } + else + { + image_mat = absl::make_unique( + 150, 150, CV_8UC4, + cv::Scalar(255, 255, + 255)); + *target_format = ImageFormat::SRGBA; + } + + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::GetMasks(CalculatorContext *cc, + std::unordered_map &all_masks, std::unique_ptr &image_mat) + { + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + if (cc->Inputs().HasTag(kLandmarksTag)) + { + const LandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + const Landmark &landmark = landmarks.landmark(order); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + const NormalizedLandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + cv::Mat mask; + std::vector point_array; + for (const auto &[key, value] : orderList) + { + for (auto order : value) + { + const NormalizedLandmark &landmark = landmarks.landmark(order); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + point_array.push_back(cv::Point(x, y)); + } + + std::vector> point_vec; + point_vec.push_back(point_array); + mask = cv::Mat::zeros(image_mat->size(), CV_32FC1); + cv::fillPoly(mask, point_vec, cv::Scalar::all(255), cv::LINE_AA); + mask.convertTo(mask, CV_8U); + all_masks.insert(make_pair(key, mask)); + point_vec.clear(); + point_array.clear(); + } + } + return absl::OkStatus(); + } + + absl::Status LandmarksToMaskCalculator::GetFaceBox(CalculatorContext *cc, std::unique_ptr &image_mat) + { + cv::Mat mat_image_ = *image_mat.get(); + + int image_width_ = image_mat->cols; + int image_height_ = image_mat->rows; + + std::vector x_s, y_s; + double box_min_y, box_max_y, box_max_x, box_min_x; + if (cc->Inputs().HasTag(kLandmarksTag)) + { + const LandmarkList &landmarks = + cc->Inputs().Tag(kLandmarksTag).Get(); + + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const Landmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); + } + + if (cc->Inputs().HasTag(kNormLandmarksTag)) + { + const NormalizedLandmarkList &landmarks = + cc->Inputs().Tag(kNormLandmarksTag).Get(); + + for (int i = 0; i < landmarks.landmark_size(); ++i) + { + const NormalizedLandmark &landmark = landmarks.landmark(i); + + if (!IsLandmarkVisibleAndPresent( + landmark, false, + 0.0, false, + 0.0)) + { + continue; + } + + const auto &point = landmark; + int x = -1; + int y = -1; + CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_, + image_height_, &x, &y)); + x_s.push_back(point.x()); + x_s.push_back(point.y()); + } + cv::minMaxLoc(y_s, &box_min_y, &box_max_y); + cv::minMaxLoc(x_s, &box_min_x, &box_max_x); + box_min_y = box_min_y * 0.9; + face_box = std::make_tuple(box_min_x, box_min_y, box_max_x, box_max_y); + } + + return absl::OkStatus(); + } + + REGISTER_CALCULATOR(LandmarksToMaskCalculator); +} // namespace mediapipe diff --git a/mediapipe/landmarks/landmarks_to_mask_calculator.h b/mediapipe/landmarks/landmarks_to_mask_calculator.h new file mode 100644 index 000000000..7454f206c --- /dev/null +++ b/mediapipe/landmarks/landmarks_to_mask_calculator.h @@ -0,0 +1,87 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_ + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/vector.h" + +namespace mediapipe +{ + + // A calculator that converts Landmark proto to RenderData proto for + // visualization. The input should be LandmarkList proto. It is also possible + // to specify the connections between landmarks. + // + // Example config: + // node { + // calculator: "LandmarksToMaskCalculator" + // input_stream: "NORM_LANDMARKS:landmarks" + // output_stream: "RENDER_DATA:render_data" + // options { + // [LandmarksToRenderDataCalculatorOptions.ext] { + // landmark_connections: [0, 1, 1, 2] + // landmark_color { r: 0 g: 255 b: 0 } + // connection_color { r: 0 g: 255 b: 0 } + // thickness: 4.0 + // } + // } + // } + class LandmarksToMaskCalculator : public CalculatorBase + { + public: + LandmarksToMaskCalculator() = default; + ~LandmarksToMaskCalculator() override = default; + LandmarksToMaskCalculator(const LandmarksToMaskCalculator &) = + delete; + LandmarksToMaskCalculator &operator=( + const LandmarksToMaskCalculator &) = delete; + + static absl::Status GetContract(CalculatorContract *cc); + + absl::Status Open(CalculatorContext *cc) override; + + absl::Status Process(CalculatorContext *cc) override; + + private: + absl::Status RenderToCpu(CalculatorContext *cc, + std::unordered_map &all_masks); + + absl::Status GetFaceBox(CalculatorContext *cc, std::unique_ptr &image_mat); + + absl::Status GetMasks(CalculatorContext *cc, std::unordered_map &all_masks, std::unique_ptr &image_mat); + + absl::Status CreateRenderTargetCpu( + CalculatorContext *cc, std::unique_ptr &image_mat, + ImageFormat::Format *target_format); + }; + +} // namespace mediapipe +#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_MASK_CALCULATOR_H_