From 393ad8ffccea5020c7fec4429ee170a717acd903 Mon Sep 17 00:00:00 2001 From: dmaletskiy Date: Thu, 12 Aug 2021 17:40:07 +0300 Subject: [PATCH] Added possibility to get detected face bounding boxes --- .../examples/desktop/face_mesh_dll/BUILD | 1 + .../desktop/face_mesh_dll/face_mesh_cpu.cpp | 20 +- .../desktop/face_mesh_dll/face_mesh_lib.cpp | 189 +++++++++++++++--- .../desktop/face_mesh_dll/face_mesh_lib.h | 44 +++- 4 files changed, 213 insertions(+), 41 deletions(-) diff --git a/mediapipe/examples/desktop/face_mesh_dll/BUILD b/mediapipe/examples/desktop/face_mesh_dll/BUILD index 3a20d0f43..8b6029790 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/BUILD +++ b/mediapipe/examples/desktop/face_mesh_dll/BUILD @@ -36,6 +36,7 @@ windows_dll_library( "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:opencv_highgui", "//mediapipe/framework/port:opencv_imgproc", diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp index 0d9011dd0..83762a1a1 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp @@ -36,7 +36,8 @@ int main(int argc, char **argv) { for (int i = 0; i < maxNumFaces; ++i) { multiFaceLandmarks[i] = new cv::Point2f[MPFaceMeshDetectorLandmarksNum]; } - const auto faceCount = std::make_unique(); + + std::vector multiFaceBoundingBoxes(maxNumFaces); LOG(INFO) << "FaceMeshDetector constructed."; @@ -54,14 +55,21 @@ int main(int argc, char **argv) { cv::Mat camera_frame; cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB); - cv::flip(camera_frame, camera_frame, /*flipcode=HORIZONTAL*/ 1); - MPFaceMeshDetectorProcessFrame2D(faceMeshDetector, camera_frame, - faceCount.get(), multiFaceLandmarks); + int faceCount = 0; - LOG(INFO) << "Detected faces num: " << *faceCount; + MPFaceMeshDetectorDetectFaces(faceMeshDetector, camera_frame, + multiFaceBoundingBoxes.data(), &faceCount); - if (*faceCount > 0) { + if (faceCount > 0) { + auto &face_bounding_box = multiFaceBoundingBoxes[0]; + + cv::rectangle(camera_frame_raw, face_bounding_box, cv::Scalar(0, 255, 0), + 3); + + int landmarksNum = 0; + MPFaceMeshDetectorDetect2DLandmarks(faceMeshDetector, multiFaceLandmarks, + &landmarksNum); auto &face_landmarks = multiFaceLandmarks[0]; auto &landmark = face_landmarks[0]; diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp index 167a38f93..b3082e58c 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp @@ -52,11 +52,17 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, graph.AddOutputStreamPoller(kOutputStream_landmarks)); ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, graph.AddOutputStreamPoller(kOutputStream_faceCount)); + ASSIGN_OR_RETURN( + mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); landmarks_poller_ptr = std::make_unique( std::move(landmarks_poller)); face_count_poller_ptr = std::make_unique( std::move(face_count_poller)); + face_rects_from_landmarks_poller_ptr = + std::make_unique( + std::move(face_rects_from_landmarks_poller)); MP_RETURN_IF_ERROR(graph.StartRun({})); @@ -65,10 +71,19 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, return absl::OkStatus(); } -absl::Status MPFaceMeshDetector::ProcessFrame2DWithStatus( - const cv::Mat &camera_frame, int *numFaces, - cv::Point2f **multi_face_landmarks) { +absl::Status +MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + if (!numFaces || !multi_face_bounding_boxes) { + return absl::InvalidArgumentError( + "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " + "save results data."); + } + + // Reset face counts. *numFaces = 0; + face_count = 0; // Wrap Mat into an ImageFrame. auto input_frame = absl::make_unique( @@ -92,26 +107,89 @@ absl::Status MPFaceMeshDetector::ProcessFrame2DWithStatus( "Failed during getting next face_count_packet."); } - auto &face_count = face_count_packet.Get(); + auto &face_count_val = face_count_packet.Get(); - if (face_count <= 0) { + if (face_count_val <= 0) { return absl::OkStatus(); } + // Get face bounding boxes. + mediapipe::Packet face_rects_from_landmarks_packet; + if (!face_rects_from_landmarks_poller_ptr || + !face_rects_from_landmarks_poller_ptr->Next( + &face_rects_from_landmarks_packet)) { + return absl::CancelledError( + "Failed during getting next face_rects_from_landmarks_packet."); + } + + auto &face_bounding_boxes = + face_rects_from_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedRect>>(); + + image_width = camera_frame.cols; + image_height = camera_frame.rows; + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert vector (center based Rects) to cv::Rect* + // (leftTop based Rects). + for (int i = 0; i < face_count_val; ++i) { + const auto &normalized_bounding_box = face_bounding_boxes[i]; + auto &bounding_box = multi_face_bounding_boxes[i]; + + const auto width = + static_cast(normalized_bounding_box.width() * image_width_f); + const auto height = + static_cast(normalized_bounding_box.height() * image_height_f); + + bounding_box.x = + static_cast(normalized_bounding_box.x_center() * image_width_f) - + (width >> 1); + bounding_box.y = + static_cast(normalized_bounding_box.y_center() * image_height_f) - + (height >> 1); + bounding_box.width = width; + bounding_box.height = height; + } + // Get face landmarks. - mediapipe::Packet face_landmarks_packet; if (!landmarks_poller_ptr || !landmarks_poller_ptr->Next(&face_landmarks_packet)) { return absl::CancelledError("Failed during getting next landmarks_packet."); } - auto &output_landmarks_vector = + *numFaces = face_count_val; + face_count = face_count_val; + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + const auto status = + DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message(); + } +} +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point2f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = face_landmarks_packet .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + // Convert landmarks to cv::Point2f**. for (int i = 0; i < face_count; ++i) { - const auto &normalizedLandmarkList = output_landmarks_vector[i]; + const auto &normalizedLandmarkList = face_landmarks[i]; const auto landmarks_num = normalizedLandmarkList.landmark_size(); if (landmarks_num != kLandmarksNum) { @@ -122,25 +200,70 @@ absl::Status MPFaceMeshDetector::ProcessFrame2DWithStatus( for (int j = 0; j < landmarks_num; ++j) { const auto &landmark = normalizedLandmarkList.landmark(j); - face_landmarks[j].x = landmark.x(); - face_landmarks[j].y = landmark.y(); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; } } - *numFaces = face_count; - return absl::OkStatus(); } -void MPFaceMeshDetector::ProcessFrame2D(const cv::Mat &camera_frame, - int *numFaces, - cv::Point2f **multi_face_landmarks) { - const auto status = - ProcessFrame2DWithStatus(camera_frame, numFaces, multi_face_landmarks); - if (!status.ok()) { - LOG(INFO) << "Failed ProcessFrame2D."; - LOG(INFO) << status.message(); +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point3f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); } + + auto &face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point3f**. + for (int i = 0; i < face_count; ++i) { + const auto &normalized_landmark_list = face_landmarks[i]; + const auto landmarks_num = normalized_landmark_list.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; ++j) { + const auto &landmark = normalized_landmark_list.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + face_landmarks[j].z = landmark.z(); + } + } + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; } extern "C" { @@ -155,14 +278,26 @@ DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { delete detector; } +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces) { + detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); +} DLLEXPORT void -MPFaceMeshDetectorProcessFrame2D(MPFaceMeshDetector *detector, - const cv::Mat &camera_frame, int *numFaces, - cv::Point2f **multi_face_landmarks) { - detector->ProcessFrame2D(camera_frame, numFaces, multi_face_landmarks); +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); } -DLLEXPORT const int MPFaceMeshDetectorLandmarksNum = MPFaceMeshDetector::kLandmarksNum; +DLLEXPORT const int MPFaceMeshDetectorLandmarksNum = + MPFaceMeshDetector::kLandmarksNum; } const std::string MPFaceMeshDetector::graphConfig = R"pb( @@ -178,6 +313,10 @@ output_stream: "multi_face_landmarks" # Detected faces count. (int) output_stream: "face_count" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "face_rects_from_landmarks" + node { calculator: "FlowLimiterCalculator" input_stream: "input_video" diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h index 4b40417bc..6705b42e6 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h @@ -20,6 +20,7 @@ #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/framework/output_stream_poller.h" #include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/port/opencv_highgui_inc.h" @@ -30,25 +31,33 @@ class MPFaceMeshDetector { public: - static constexpr auto kLandmarksNum = 468; - MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, const char *face_landmark_model_path); - void ProcessFrame2D(const cv::Mat &camera_frame, int *numFaces, - cv::Point2f **multi_face_landmarks); + void DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + + void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); + void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); + + static constexpr auto kLandmarksNum = 468; private: absl::Status InitFaceMeshDetector(int numFaces, const char *face_detection_model_path, const char *face_landmark_model_path); - absl::Status ProcessFrame2DWithStatus(const cv::Mat &camera_frame, - int *numFaces, - cv::Point2f **multi_face_landmarks); + absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces); + + absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); + absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); static constexpr auto kInputStream = "input_video"; static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; static constexpr auto kOutputStream_faceCount = "face_count"; + static constexpr auto kOutputStream_face_rects_from_landmarks = + "face_rects_from_landmarks"; static const std::string graphConfig; @@ -56,6 +65,13 @@ private: std::unique_ptr landmarks_poller_ptr; std::unique_ptr face_count_poller_ptr; + std::unique_ptr + face_rects_from_landmarks_poller_ptr; + + int face_count; + int image_width; + int image_height; + mediapipe::Packet face_landmarks_packet; }; #ifdef __cplusplus @@ -68,10 +84,18 @@ MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + DLLEXPORT void -MPFaceMeshDetectorProcessFrame2D(MPFaceMeshDetector *detector, - const cv::Mat &camera_frame, int *numFaces, - cv::Point2f **multi_face_landmarks); +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces); +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces); DLLEXPORT extern const int MPFaceMeshDetectorLandmarksNum;