From d861abde7cf4656895d5950d2dbe40c7480c08c5 Mon Sep 17 00:00:00 2001 From: Pavlo-Ivan Mykhalevych Date: Mon, 13 Dec 2021 15:10:03 +0200 Subject: [PATCH] Add attention model --- .../desktop/face_mesh_dll/face_mesh_cpu.cpp | 9 ++- .../desktop/face_mesh_dll/face_mesh_lib.cpp | 42 ++++++++++--- .../desktop/face_mesh_dll/face_mesh_lib.h | 27 ++++++--- mediapipe/modules/face_landmark/BUILD | 4 ++ ...ont_side_model_cpu_with_face_counter.pbtxt | 9 ++- .../face_landmark_side_model_cpu.pbtxt | 60 ++++++++++++++----- 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp index 83762a1a1..a982c133d 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp @@ -27,9 +27,12 @@ int main(int argc, char **argv) { "mediapipe/modules/face_detection/face_detection_short_range.tflite"; constexpr char face_landmark_model_path[] = "mediapipe/modules/face_landmark/face_landmark.tflite"; + constexpr char face_landmark_with_attention_model_path[] = + "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"; + constexpr bool with_attention = true; MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct( - maxNumFaces, face_detection_model_path, face_landmark_model_path); + maxNumFaces, face_detection_model_path, face_landmark_model_path, with_attention, face_landmark_with_attention_model_path); // Allocate memory for face landmarks. auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces]; @@ -73,6 +76,10 @@ int main(int argc, char **argv) { auto &face_landmarks = multiFaceLandmarks[0]; auto &landmark = face_landmarks[0]; + for (auto i = 0; i < 478; ++i) { + cv::circle(camera_frame_raw, face_landmarks[i], 1.2, cv::Scalar(0, 0, 255)); + } + LOG(INFO) << "First landmark: x - " << landmark.x << ", y - " << landmark.y; } diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp index b3082e58c..0fe85a49e 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp @@ -1,20 +1,33 @@ #include "face_mesh_lib.h" +int MPFaceMeshDetector::kLandmarksNum = 468; + MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path) { - const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, - face_landmark_model_path); + const char *face_landmark_model_path, + bool with_attention, + const char *face_landmark_with_attention_model_path) { + const auto status = InitFaceMeshDetector( + numFaces, + face_detection_model_path, + face_landmark_model_path, + with_attention, + face_landmark_with_attention_model_path); if (!status.ok()) { LOG(INFO) << "Failed constructing FaceMeshDetector."; LOG(INFO) << status.message(); } + if (with_attention) { + kLandmarksNum = kLandmarksNumWithAttention; + } } absl::Status MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path) { + const char *face_landmark_model_path, + bool with_attention, + const char *face_landmark_with_attention_model_path) { numFaces = std::max(numFaces, 1); if (face_detection_model_path == nullptr) { @@ -22,6 +35,10 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, "mediapipe/modules/face_detection/face_detection_short_range.tflite"; } + if (with_attention) { + face_landmark_model_path = face_landmark_with_attention_model_path; + } + if (face_landmark_model_path == nullptr) { face_landmark_model_path = "mediapipe/modules/face_landmark/face_landmark.tflite"; @@ -30,6 +47,8 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, // Prepare graph config. auto preparedGraphConfig = absl::StrReplaceAll( graphConfig, {{"$numFaces", std::to_string(numFaces)}}); + preparedGraphConfig = with_attention ? absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "true"} }) : + absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "false"} }); preparedGraphConfig = absl::StrReplaceAll( preparedGraphConfig, {{"$faceDetectionModelPath", face_detection_model_path}}); @@ -268,10 +287,13 @@ void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, extern "C" { DLLEXPORT MPFaceMeshDetector * -MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path) { +MPFaceMeshDetectorConstruct(int numFaces, + const char* face_detection_model_path, + const char* face_landmark_model_path, + bool with_attention, + const char* face_landmark_model_with_attention_path){ return new MPFaceMeshDetector(numFaces, face_detection_model_path, - face_landmark_model_path); + face_landmark_model_path, with_attention, face_landmark_model_with_attention_path); } DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { @@ -331,10 +353,12 @@ node { # Defines side packets for further use in the graph. node { calculator: "ConstantSidePacketCalculator" - output_side_packet: "PACKET:num_faces" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" node_options: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { packet { int_value: $numFaces } + packet { bool_value: $with_attention } } } } @@ -374,6 +398,7 @@ node { input_side_packet: "MODEL_BLOB:face_detection_model_blob" output_side_packet: "MODEL:face_detection_model" } + node { calculator: "TfLiteModelCalculator" input_side_packet: "MODEL_BLOB:face_landmark_model_blob" @@ -388,6 +413,7 @@ node { input_side_packet: "NUM_FACES:num_faces" input_side_packet: "MODEL:0:face_detection_model" input_side_packet: "MODEL:1:face_landmark_model" + input_side_packet: "WITH_ATTENTION:with_attention" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "DETECTIONS:face_detections" diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h index 6705b42e6..84556fb4d 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h @@ -31,25 +31,32 @@ class MPFaceMeshDetector { public: - MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path); + MPFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path, + bool with_attention, + const char* face_landmark_model_with_attention_path); void DetectFaces(const cv::Mat &camera_frame, cv::Rect *multi_face_bounding_boxes, int *numFaces); - + void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); - static constexpr auto kLandmarksNum = 468; + static constexpr auto kLandmarksNumWithoutAttention = 468; + static constexpr auto kLandmarksNumWithAttention = 478; + static int kLandmarksNum; private: absl::Status InitFaceMeshDetector(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path); + const char *face_landmark_model_path, + bool with_attention, + const char* face_landmark_model_with_attention_path); absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, cv::Rect *multi_face_bounding_boxes, int *numFaces); - + absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); @@ -79,8 +86,12 @@ extern "C" { #endif DLLEXPORT MPFaceMeshDetector * -MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path); +MPFaceMeshDetectorConstruct(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path, + bool with_attention = true, + const char* face_landmark_model_with_attention_path = "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" + ); DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index ba2a53198..de16605b8 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -47,14 +47,18 @@ mediapipe_simple_subgraph( graph = "face_landmark_side_model_cpu.pbtxt", register_as = "FaceLandmarkSideModelCpu", deps = [ + ":tensors_to_face_landmarks", + ":tensors_to_face_landmarks_with_attention", "//mediapipe/calculators/core:gate_calculator", "//mediapipe/calculators/core:split_vector_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/tensor:tensors_to_floats_calculator", "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", "//mediapipe/calculators/util:landmark_projection_calculator", "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/framework/tool:switch_container", ], ) diff --git a/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt index dc83f17b7..2d82b9883 100644 --- a/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt +++ b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt @@ -31,8 +31,12 @@ input_side_packet: "MODEL:0:face_detection_model" # NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model # only, can be passed here, otherwise - results are undefined. input_side_packet: "MODEL:1:face_landmark_model" - -# Collection of detected/predicted faces, each represented as a list of 468 face +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" +# Collection of detected/predicted faces depends on with_attention, if true : each represented as a list of 468 face +# landmarks, if false: each represented as a list of 478 face # landmarks. (std::vector) # NOTE: there will not be an output packet in the LANDMARKS stream for this # particular timestamp if none of faces detected. However, the MediaPipe @@ -207,6 +211,7 @@ node { input_stream: "IMAGE:landmarks_loop_image" input_stream: "ROI:face_rect" input_side_packet: "MODEL:face_landmark_model" + input_side_packet: "WITH_ATTENTION:with_attention" output_stream: "LANDMARKS:face_landmarks" } diff --git a/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt index d8537fd82..f1d8b81a6 100644 --- a/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt +++ b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt @@ -29,7 +29,10 @@ input_stream: "ROI:roi" # only, can be passed here, otherwise - results are undefined. input_side_packet: "MODEL:face_landmark_model" - +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" # 468 face landmarks within the given ROI. (NormalizedLandmarkList) # NOTE: if a face is not present within the given ROI, for this particular # timestamp there will not be an output packet in the LANDMARKS stream. However, @@ -55,31 +58,56 @@ node: { } } +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" +} + # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a # vector of tensors representing, for instance, detection boxes/keypoints and # scores. node { calculator: "InferenceCalculator" input_stream: "TENSORS:input_tensors" - output_stream: "TENSORS:output_tensors" input_side_packet: "MODEL:face_landmark_model" - options { + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" + output_stream: "TENSORS:output_tensors" + options: { [mediapipe.InferenceCalculatorOptions.ext] { delegate { tflite {} } } } } -# Splits a vector of tensors into multiple vectors. +# Splits a vector of tensors into landmark tensors and face flag tensor. node { - calculator: "SplitTensorVectorCalculator" + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" input_stream: "output_tensors" output_stream: "landmark_tensors" output_stream: "face_flag_tensor" options: { - [mediapipe.SplitVectorCalculatorOptions.ext] { - ranges: { begin: 0 end: 1 } - ranges: { begin: 1 end: 2 } + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } + } + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 6 } + ranges: { begin: 6 end: 7 } + } + } + } } } } @@ -121,14 +149,18 @@ node { # Decodes the landmark tensors into a vector of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { - calculator: "TensorsToLandmarksCalculator" + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" input_stream: "TENSORS:ensured_landmark_tensors" - output_stream: "NORM_LANDMARKS:landmarks" + output_stream: "LANDMARKS:landmarks" options: { - [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { - num_landmarks: 468 - input_image_width: 192 - input_image_height: 192 + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "TensorsToFaceLandmarks" + } + contained_node: { + calculator: "TensorsToFaceLandmarksWithAttention" + } } } }