diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index e759ff990..869b4387e 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -18,6 +18,20 @@ licenses(["notice"]) package(default_visibility = ["//visibility:public"]) +cc_library( + name = "counting_vector_size_calculator", + srcs = ["counting_vector_size_calculator.cc"], + hdrs = ["counting_vector_size_calculator.h"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + ], + alwayslink = 1, +) + cc_library( name = "alignment_points_to_rects_calculator", srcs = ["alignment_points_to_rects_calculator.cc"], diff --git a/mediapipe/calculators/util/counting_vector_size_calculator.cc b/mediapipe/calculators/util/counting_vector_size_calculator.cc new file mode 100644 index 000000000..c2203686f --- /dev/null +++ b/mediapipe/calculators/util/counting_vector_size_calculator.cc @@ -0,0 +1,26 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/counting_vector_size_calculator.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +namespace mediapipe { + +typedef CountingVectorSizeCalculator< + std::vector<::mediapipe::NormalizedLandmarkList>> + CountingNormalizedLandmarkListVectorSizeCalculator; + +REGISTER_CALCULATOR(CountingNormalizedLandmarkListVectorSizeCalculator); +} // namespace mediapipe diff --git a/mediapipe/calculators/util/counting_vector_size_calculator.h b/mediapipe/calculators/util/counting_vector_size_calculator.h new file mode 100644 index 000000000..4921d3c27 --- /dev/null +++ b/mediapipe/calculators/util/counting_vector_size_calculator.h @@ -0,0 +1,79 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H +#define MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" + +namespace mediapipe { + +// A calculator that counts the size of the input vector. It was created to +// aid in polling packets in the output stream synchronously. If there is +// a clock stream, it will output a value of 0 even if the input vector stream +// is empty. If not, it will output some value only if there is an input vector. +// The clock stream must have the same time stamp as the vector stream, and +// it must be the stream where packets are transmitted while the graph is +// running. (e.g. Any input stream of graph) +// +// It is designed to be used like: +// +// Example config: +// node { +// calculator: "CountingWithVectorSizeCalculator" +// input_stream: "CLOCK:triger_signal" +// input_stream: "VECTOR:input_vector" +// output_stream: "COUNT:vector_count" +// } +// +// node { +// calculator: "CountingWithVectorSizeCalculator" +// input_stream: "VECTOR:input_vector" +// output_stream: "COUNT:vector_count" +// } + +template +class CountingVectorSizeCalculator : public CalculatorBase { +public: + static ::mediapipe::Status GetContract(CalculatorContract *cc) { + if (cc->Inputs().HasTag("CLOCK")) { + cc->Inputs().Tag("CLOCK").SetAny(); + } + + RET_CHECK(cc->Inputs().HasTag("VECTOR")); + cc->Inputs().Tag("VECTOR").Set(); + RET_CHECK(cc->Outputs().HasTag("COUNT")); + cc->Outputs().Tag("COUNT").Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext *cc) { + std::unique_ptr face_count; + if (!cc->Inputs().Tag("VECTOR").IsEmpty()) { + const auto &landmarks = cc->Inputs().Tag("VECTOR").Get(); + face_count = absl::make_unique(landmarks.size()); + } else { + face_count = absl::make_unique(0); + } + cc->Outputs().Tag("COUNT").Add(face_count.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); + }; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H diff --git a/mediapipe/examples/desktop/face_mesh_dll/BUILD b/mediapipe/examples/desktop/face_mesh_dll/BUILD new file mode 100644 index 000000000..8b6029790 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/BUILD @@ -0,0 +1,66 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("windows_dll_library.bzl", "windows_dll_library") + +licenses(["notice"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//examples:__pkg__"], +) + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +# Define the shared library +windows_dll_library( + name = "face_mesh_lib", + srcs = ["face_mesh_lib.cpp"], + hdrs = ["face_mesh_lib.h"], + # Define COMPILING_DLL to export symbols during compiling the DLL. + copts = ["-DCOMPILING_DLL"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/modules/face_landmark:face_landmark_front_side_model_cpu_with_face_counter", + ] +) + +# **Implicitly link to face_mesh_lib.dll** + +## Link to face_mesh_lib.dll through its import library. +cc_binary( + name = "face_mesh_cpu", + srcs = ["face_mesh_cpu.cpp"], + deps = [ + ":face_mesh_lib", + ], +) \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp new file mode 100644 index 000000000..83762a1a1 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp @@ -0,0 +1,96 @@ +#include "face_mesh_lib.h" + +int main(int argc, char **argv) { + google::InitGoogleLogging(argv[0]); + absl::ParseCommandLine(argc, argv); + + cv::VideoCapture capture; + capture.open(0); + if (!capture.isOpened()) { + return -1; + } + + constexpr char kWindowName[] = "MediaPipe"; + + cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); +#if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2) + capture.set(cv::CAP_PROP_FRAME_WIDTH, 640); + capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + capture.set(cv::CAP_PROP_FPS, 30); +#endif + + LOG(INFO) << "VideoCapture initialized."; + + // Maximum number of faces that can be detected + constexpr int maxNumFaces = 1; + constexpr char face_detection_model_path[] = + "mediapipe/modules/face_detection/face_detection_short_range.tflite"; + constexpr char face_landmark_model_path[] = + "mediapipe/modules/face_landmark/face_landmark.tflite"; + + MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct( + maxNumFaces, face_detection_model_path, face_landmark_model_path); + + // Allocate memory for face landmarks. + auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces]; + for (int i = 0; i < maxNumFaces; ++i) { + multiFaceLandmarks[i] = new cv::Point2f[MPFaceMeshDetectorLandmarksNum]; + } + + std::vector multiFaceBoundingBoxes(maxNumFaces); + + LOG(INFO) << "FaceMeshDetector constructed."; + + LOG(INFO) << "Start grabbing and processing frames."; + bool grab_frames = true; + + while (grab_frames) { + // Capture opencv camera. + cv::Mat camera_frame_raw; + capture >> camera_frame_raw; + if (camera_frame_raw.empty()) { + LOG(INFO) << "Ignore empty frames from camera."; + continue; + } + + cv::Mat camera_frame; + cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB); + + int faceCount = 0; + + MPFaceMeshDetectorDetectFaces(faceMeshDetector, camera_frame, + multiFaceBoundingBoxes.data(), &faceCount); + + if (faceCount > 0) { + auto &face_bounding_box = multiFaceBoundingBoxes[0]; + + cv::rectangle(camera_frame_raw, face_bounding_box, cv::Scalar(0, 255, 0), + 3); + + int landmarksNum = 0; + MPFaceMeshDetectorDetect2DLandmarks(faceMeshDetector, multiFaceLandmarks, + &landmarksNum); + auto &face_landmarks = multiFaceLandmarks[0]; + auto &landmark = face_landmarks[0]; + + LOG(INFO) << "First landmark: x - " << landmark.x << ", y - " + << landmark.y; + } + + const int pressed_key = cv::waitKey(5); + if (pressed_key >= 0 && pressed_key != 255) + grab_frames = false; + + cv::imshow(kWindowName, camera_frame_raw); + } + + LOG(INFO) << "Shutting down."; + + // Deallocate memory for face landmarks. + for (int i = 0; i < maxNumFaces; ++i) { + delete[] multiFaceLandmarks[i]; + } + delete[] multiFaceLandmarks; + + MPFaceMeshDetectorDestruct(faceMeshDetector); +} \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp new file mode 100644 index 000000000..b3082e58c --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp @@ -0,0 +1,398 @@ +#include "face_mesh_lib.h" + +MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { + const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); + if (!status.ok()) { + LOG(INFO) << "Failed constructing FaceMeshDetector."; + LOG(INFO) << status.message(); + } +} + +absl::Status +MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { + numFaces = std::max(numFaces, 1); + + if (face_detection_model_path == nullptr) { + face_detection_model_path = + "mediapipe/modules/face_detection/face_detection_short_range.tflite"; + } + + if (face_landmark_model_path == nullptr) { + face_landmark_model_path = + "mediapipe/modules/face_landmark/face_landmark.tflite"; + } + + // Prepare graph config. + auto preparedGraphConfig = absl::StrReplaceAll( + graphConfig, {{"$numFaces", std::to_string(numFaces)}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceDetectionModelPath", face_detection_model_path}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceLandmarkModelPath", face_landmark_model_path}}); + + LOG(INFO) << "Get calculator graph config contents: " << preparedGraphConfig; + + mediapipe::CalculatorGraphConfig config = + mediapipe::ParseTextProtoOrDie( + preparedGraphConfig); + LOG(INFO) << "Initialize the calculator graph."; + + MP_RETURN_IF_ERROR(graph.Initialize(config)); + + LOG(INFO) << "Start running the calculator graph."; + + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_landmarks)); + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, + graph.AddOutputStreamPoller(kOutputStream_faceCount)); + ASSIGN_OR_RETURN( + mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); + + landmarks_poller_ptr = std::make_unique( + std::move(landmarks_poller)); + face_count_poller_ptr = std::make_unique( + std::move(face_count_poller)); + face_rects_from_landmarks_poller_ptr = + std::make_unique( + std::move(face_rects_from_landmarks_poller)); + + MP_RETURN_IF_ERROR(graph.StartRun({})); + + LOG(INFO) << "MPFaceMeshDetector constructed successfully."; + + return absl::OkStatus(); +} + +absl::Status +MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + if (!numFaces || !multi_face_bounding_boxes) { + return absl::InvalidArgumentError( + "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " + "save results data."); + } + + // Reset face counts. + *numFaces = 0; + face_count = 0; + + // Wrap Mat into an ImageFrame. + auto input_frame = absl::make_unique( + mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, + mediapipe::ImageFrame::kDefaultAlignmentBoundary); + cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get()); + camera_frame.copyTo(input_frame_mat); + + // Send image packet into the graph. + size_t frame_timestamp_us = static_cast(cv::getTickCount()) / + static_cast(cv::getTickFrequency()) * 1e6; + MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( + kInputStream, mediapipe::Adopt(input_frame.release()) + .At(mediapipe::Timestamp(frame_timestamp_us)))); + + // Get face count. + mediapipe::Packet face_count_packet; + if (!face_count_poller_ptr || + !face_count_poller_ptr->Next(&face_count_packet)) { + return absl::CancelledError( + "Failed during getting next face_count_packet."); + } + + auto &face_count_val = face_count_packet.Get(); + + if (face_count_val <= 0) { + return absl::OkStatus(); + } + + // Get face bounding boxes. + mediapipe::Packet face_rects_from_landmarks_packet; + if (!face_rects_from_landmarks_poller_ptr || + !face_rects_from_landmarks_poller_ptr->Next( + &face_rects_from_landmarks_packet)) { + return absl::CancelledError( + "Failed during getting next face_rects_from_landmarks_packet."); + } + + auto &face_bounding_boxes = + face_rects_from_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedRect>>(); + + image_width = camera_frame.cols; + image_height = camera_frame.rows; + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert vector (center based Rects) to cv::Rect* + // (leftTop based Rects). + for (int i = 0; i < face_count_val; ++i) { + const auto &normalized_bounding_box = face_bounding_boxes[i]; + auto &bounding_box = multi_face_bounding_boxes[i]; + + const auto width = + static_cast(normalized_bounding_box.width() * image_width_f); + const auto height = + static_cast(normalized_bounding_box.height() * image_height_f); + + bounding_box.x = + static_cast(normalized_bounding_box.x_center() * image_width_f) - + (width >> 1); + bounding_box.y = + static_cast(normalized_bounding_box.y_center() * image_height_f) - + (height >> 1); + bounding_box.width = width; + bounding_box.height = height; + } + + // Get face landmarks. + if (!landmarks_poller_ptr || + !landmarks_poller_ptr->Next(&face_landmarks_packet)) { + return absl::CancelledError("Failed during getting next landmarks_packet."); + } + + *numFaces = face_count_val; + face_count = face_count_val; + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + const auto status = + DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message(); + } +} +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point2f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point2f**. + for (int i = 0; i < face_count; ++i) { + const auto &normalizedLandmarkList = face_landmarks[i]; + const auto landmarks_num = normalizedLandmarkList.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; ++j) { + const auto &landmark = normalizedLandmarkList.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + } + } + + return absl::OkStatus(); +} + +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point3f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point3f**. + for (int i = 0; i < face_count; ++i) { + const auto &normalized_landmark_list = face_landmarks[i]; + const auto landmarks_num = normalized_landmark_list.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; ++j) { + const auto &landmark = normalized_landmark_list.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + face_landmarks[j].z = landmark.z(); + } + } + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +extern "C" { +DLLEXPORT MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path) { + return new MPFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); +} + +DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { + delete detector; +} + +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces) { + detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); +} +DLLEXPORT void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} + +DLLEXPORT const int MPFaceMeshDetectorLandmarksNum = + MPFaceMeshDetector::kLandmarksNum; +} + +const std::string MPFaceMeshDetector::graphConfig = R"pb( +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Detected faces count. (int) +output_stream: "face_count" + +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "face_rects_from_landmarks" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:face_count" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: $numFaces } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_detection_model_path" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { string_value: "$faceDetectionModelPath" } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_landmark_model_path" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { string_value: "$faceLandmarkModelPath" } + } + } +} + +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:0:face_detection_model_path" + input_side_packet: "FILE_PATH:1:face_landmark_model_path" + output_side_packet: "CONTENTS:0:face_detection_model_blob" + output_side_packet: "CONTENTS:1:face_landmark_model_blob" +} + +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_detection_model_blob" + output_side_packet: "MODEL:face_detection_model" +} +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_landmark_model_blob" + output_side_packet: "MODEL:face_landmark_model" +} + + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontSideModelCpuWithFaceCounter" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "MODEL:0:face_detection_model" + input_side_packet: "MODEL:1:face_landmark_model" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" +} + +)pb"; diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h new file mode 100644 index 000000000..6705b42e6 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h @@ -0,0 +1,105 @@ +#ifndef FACE_MESH_LIBRARY_H +#define FACE_MESH_LIBRARY_H + +#ifdef COMPILING_DLL +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __declspec(dllimport) +#endif + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/str_replace.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/output_stream_poller.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +class MPFaceMeshDetector { +public: + MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + + void DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + + void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); + void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); + + static constexpr auto kLandmarksNum = 468; + +private: + absl::Status InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path); + absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces); + + absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); + absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); + + static constexpr auto kInputStream = "input_video"; + static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; + static constexpr auto kOutputStream_faceCount = "face_count"; + static constexpr auto kOutputStream_face_rects_from_landmarks = + "face_rects_from_landmarks"; + + static const std::string graphConfig; + + mediapipe::CalculatorGraph graph; + + std::unique_ptr landmarks_poller_ptr; + std::unique_ptr face_count_poller_ptr; + std::unique_ptr + face_rects_from_landmarks_poller_ptr; + + int face_count; + int image_width; + int image_height; + mediapipe::Packet face_landmarks_packet; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +DLLEXPORT MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + +DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); + +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + +DLLEXPORT void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces); +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces); + +DLLEXPORT extern const int MPFaceMeshDetectorLandmarksNum; + +#ifdef __cplusplus +}; +#endif +#endif \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl b/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl new file mode 100644 index 000000000..69c243d60 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl @@ -0,0 +1,62 @@ +""" +This is a simple windows_dll_library rule for builing a DLL Windows +that can be depended on by other cc rules. +Example useage: + windows_dll_library( + name = "hellolib", + srcs = [ + "hello-library.cpp", + ], + hdrs = ["hello-library.h"], + # Define COMPILING_DLL to export symbols during compiling the DLL. + copts = ["/DCOMPILING_DLL"], + ) +""" + +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") + +def windows_dll_library( + name, + srcs = [], + deps = [], + hdrs = [], + visibility = None, + **kwargs): + """A simple windows_dll_library rule for builing a DLL Windows.""" + dll_name = name + ".dll" + import_lib_name = name + "_import_lib" + import_target_name = name + "_dll_import" + + # Build the shared library + cc_binary( + name = dll_name, + srcs = srcs + hdrs, + deps = deps, + linkshared = 1, + **kwargs + ) + + # Get the import library for the dll + native.filegroup( + name = import_lib_name, + srcs = [":" + dll_name], + output_group = "interface_library", + ) + + # Because we cannot directly depend on cc_binary from other cc rules in deps attribute, + # we use cc_import as a bridge to depend on the dll. + cc_import( + name = import_target_name, + interface_library = ":" + import_lib_name, + shared_library = ":" + dll_name, + ) + + # Create a new cc_library to also include the headers needed for the shared library + cc_library( + name = name, + hdrs = hdrs, + visibility = visibility, + deps = deps + [ + ":" + import_target_name, + ], + ) \ No newline at end of file diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD index 839418c77..4a0b41544 100644 --- a/mediapipe/modules/face_detection/BUILD +++ b/mediapipe/modules/face_detection/BUILD @@ -57,6 +57,18 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_short_range_side_model_cpu", + graph = "face_detection_short_range_side_model_cpu.pbtxt", + register_as = "FaceDetectionShortRangeSideModelCpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + mediapipe_simple_subgraph( name = "face_detection_short_range_gpu", graph = "face_detection_short_range_gpu.pbtxt", diff --git a/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt new file mode 100644 index 000000000..57639bab2 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt @@ -0,0 +1,86 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "MODEL:face_detection_model" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:face_detection_model" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input CPU image (ImageFrame) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "MODEL:face_detection_model" + options { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tflite {} } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index 77560022e..6e642d7fc 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -37,6 +37,22 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_side_model_cpu", + graph = "face_landmark_side_model_cpu.pbtxt", + register_as = "FaceLandmarkSideModelCpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_gpu", graph = "face_landmark_gpu.pbtxt", @@ -74,6 +90,50 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_front_cpu_with_face_counter", + graph = "face_landmark_front_cpu_with_face_counter.pbtxt", + register_as = "FaceLandmarkFrontCpuWithFaceCounter", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:counting_vector_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_side_model_cpu_with_face_counter", + graph = "face_landmark_front_side_model_cpu_with_face_counter.pbtxt", + register_as = "FaceLandmarkFrontSideModelCpuWithFaceCounter", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_side_model_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:counting_vector_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_side_model_cpu", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_front_gpu", graph = "face_landmark_front_gpu.pbtxt", diff --git a/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt new file mode 100644 index 000000000..5389a8293 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt @@ -0,0 +1,249 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# (int) +output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" + + +# Defines whether landmarks on the previous image should be used to help +# localize landmarks on the current image. +node { + name: "ConstantSidePacketCalculator" + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:use_prev_landmarks" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { bool_value: true } + } + } +} +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeCpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Counting a multi_faceLandmarks vector size. The image stream is only used to +# make the calculator work even when there is no input vector. +node { + calculator: "CountingNormalizedLandmarkListVectorSizeCalculator" + input_stream: "CLOCK:image" + input_stream: "VECTOR:multi_face_landmarks" + output_stream: "COUNT:face_count" +} + + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt new file mode 100644 index 000000000..dc83f17b7 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt @@ -0,0 +1,256 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontSideModelCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "MODEL:0:face_detection_model" +# input_side_packet: "MODEL:1:face_landmark_model" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontSideModelCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:0:face_detection_model" +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:1:face_landmark_model" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# (int) +output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" + + +# Defines whether landmarks on the previous image should be used to help +# localize landmarks on the current image. +node { + name: "ConstantSidePacketCalculator" + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:use_prev_landmarks" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { bool_value: true } + } + } +} +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkCpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionShortRangeCpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeSideModelCpu" + input_stream: "IMAGE:gated_image" + input_side_packet: "MODEL:face_detection_model" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Counting a multi_faceLandmarks vector size. The image stream is only used to +# make the calculator work even when there is no input vector. +node { + calculator: "CountingNormalizedLandmarkListVectorSizeCalculator" + input_stream: "CLOCK:image" + input_stream: "VECTOR:multi_face_landmarks" + output_stream: "COUNT:face_count" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkSideModelCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "MODEL:face_landmark_model" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt new file mode 100644 index 000000000..d8537fd82 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt @@ -0,0 +1,143 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "MODEL:face_landmark_model" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:face_landmark_model" + + +# 468 face landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + input_side_packet: "MODEL:face_landmark_model" + options { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tflite {} } + } + } +} + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +}