Add attention model

This commit is contained in:
Pavlo-Ivan Mykhalevych 2021-12-13 15:10:03 +02:00
parent 1cf04343bc
commit d861abde7c
6 changed files with 118 additions and 33 deletions

View File

@ -27,9 +27,12 @@ int main(int argc, char **argv) {
"mediapipe/modules/face_detection/face_detection_short_range.tflite"; "mediapipe/modules/face_detection/face_detection_short_range.tflite";
constexpr char face_landmark_model_path[] = constexpr char face_landmark_model_path[] =
"mediapipe/modules/face_landmark/face_landmark.tflite"; "mediapipe/modules/face_landmark/face_landmark.tflite";
constexpr char face_landmark_with_attention_model_path[] =
"mediapipe/modules/face_landmark/face_landmark_with_attention.tflite";
constexpr bool with_attention = true;
MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct( MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct(
maxNumFaces, face_detection_model_path, face_landmark_model_path); maxNumFaces, face_detection_model_path, face_landmark_model_path, with_attention, face_landmark_with_attention_model_path);
// Allocate memory for face landmarks. // Allocate memory for face landmarks.
auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces]; auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces];
@ -73,6 +76,10 @@ int main(int argc, char **argv) {
auto &face_landmarks = multiFaceLandmarks[0]; auto &face_landmarks = multiFaceLandmarks[0];
auto &landmark = face_landmarks[0]; auto &landmark = face_landmarks[0];
for (auto i = 0; i < 478; ++i) {
cv::circle(camera_frame_raw, face_landmarks[i], 1.2, cv::Scalar(0, 0, 255));
}
LOG(INFO) << "First landmark: x - " << landmark.x << ", y - " LOG(INFO) << "First landmark: x - " << landmark.x << ", y - "
<< landmark.y; << landmark.y;
} }

View File

@ -1,20 +1,33 @@
#include "face_mesh_lib.h" #include "face_mesh_lib.h"
int MPFaceMeshDetector::kLandmarksNum = 468;
MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, MPFaceMeshDetector::MPFaceMeshDetector(int numFaces,
const char *face_detection_model_path, const char *face_detection_model_path,
const char *face_landmark_model_path) { const char *face_landmark_model_path,
const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, bool with_attention,
face_landmark_model_path); const char *face_landmark_with_attention_model_path) {
const auto status = InitFaceMeshDetector(
numFaces,
face_detection_model_path,
face_landmark_model_path,
with_attention,
face_landmark_with_attention_model_path);
if (!status.ok()) { if (!status.ok()) {
LOG(INFO) << "Failed constructing FaceMeshDetector."; LOG(INFO) << "Failed constructing FaceMeshDetector.";
LOG(INFO) << status.message(); LOG(INFO) << status.message();
} }
if (with_attention) {
kLandmarksNum = kLandmarksNumWithAttention;
}
} }
absl::Status absl::Status
MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path, const char *face_detection_model_path,
const char *face_landmark_model_path) { const char *face_landmark_model_path,
bool with_attention,
const char *face_landmark_with_attention_model_path) {
numFaces = std::max(numFaces, 1); numFaces = std::max(numFaces, 1);
if (face_detection_model_path == nullptr) { if (face_detection_model_path == nullptr) {
@ -22,6 +35,10 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
"mediapipe/modules/face_detection/face_detection_short_range.tflite"; "mediapipe/modules/face_detection/face_detection_short_range.tflite";
} }
if (with_attention) {
face_landmark_model_path = face_landmark_with_attention_model_path;
}
if (face_landmark_model_path == nullptr) { if (face_landmark_model_path == nullptr) {
face_landmark_model_path = face_landmark_model_path =
"mediapipe/modules/face_landmark/face_landmark.tflite"; "mediapipe/modules/face_landmark/face_landmark.tflite";
@ -30,6 +47,8 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
// Prepare graph config. // Prepare graph config.
auto preparedGraphConfig = absl::StrReplaceAll( auto preparedGraphConfig = absl::StrReplaceAll(
graphConfig, {{"$numFaces", std::to_string(numFaces)}}); graphConfig, {{"$numFaces", std::to_string(numFaces)}});
preparedGraphConfig = with_attention ? absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "true"} }) :
absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "false"} });
preparedGraphConfig = absl::StrReplaceAll( preparedGraphConfig = absl::StrReplaceAll(
preparedGraphConfig, preparedGraphConfig,
{{"$faceDetectionModelPath", face_detection_model_path}}); {{"$faceDetectionModelPath", face_detection_model_path}});
@ -268,10 +287,13 @@ void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks,
extern "C" { extern "C" {
DLLEXPORT MPFaceMeshDetector * DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, MPFaceMeshDetectorConstruct(int numFaces,
const char *face_landmark_model_path) { const char* face_detection_model_path,
const char* face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path){
return new MPFaceMeshDetector(numFaces, face_detection_model_path, return new MPFaceMeshDetector(numFaces, face_detection_model_path,
face_landmark_model_path); face_landmark_model_path, with_attention, face_landmark_model_with_attention_path);
} }
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) {
@ -331,10 +353,12 @@ node {
# Defines side packets for further use in the graph. # Defines side packets for further use in the graph.
node { node {
calculator: "ConstantSidePacketCalculator" calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces" output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: { node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: $numFaces } packet { int_value: $numFaces }
packet { bool_value: $with_attention }
} }
} }
} }
@ -374,6 +398,7 @@ node {
input_side_packet: "MODEL_BLOB:face_detection_model_blob" input_side_packet: "MODEL_BLOB:face_detection_model_blob"
output_side_packet: "MODEL:face_detection_model" output_side_packet: "MODEL:face_detection_model"
} }
node { node {
calculator: "TfLiteModelCalculator" calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:face_landmark_model_blob" input_side_packet: "MODEL_BLOB:face_landmark_model_blob"
@ -388,6 +413,7 @@ node {
input_side_packet: "NUM_FACES:num_faces" input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "MODEL:0:face_detection_model" input_side_packet: "MODEL:0:face_detection_model"
input_side_packet: "MODEL:1:face_landmark_model" input_side_packet: "MODEL:1:face_landmark_model"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections" output_stream: "DETECTIONS:face_detections"

View File

@ -31,25 +31,32 @@
class MPFaceMeshDetector { class MPFaceMeshDetector {
public: public:
MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, MPFaceMeshDetector(int numFaces,
const char *face_landmark_model_path); const char *face_detection_model_path,
const char *face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path);
void DetectFaces(const cv::Mat &camera_frame, void DetectFaces(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, int *numFaces); cv::Rect *multi_face_bounding_boxes, int *numFaces);
void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces);
void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces);
static constexpr auto kLandmarksNum = 468; static constexpr auto kLandmarksNumWithoutAttention = 468;
static constexpr auto kLandmarksNumWithAttention = 478;
static int kLandmarksNum;
private: private:
absl::Status InitFaceMeshDetector(int numFaces, absl::Status InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path, const char *face_detection_model_path,
const char *face_landmark_model_path); const char *face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path);
absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, cv::Rect *multi_face_bounding_boxes,
int *numFaces); int *numFaces);
absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks);
absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks);
@ -79,8 +86,12 @@ extern "C" {
#endif #endif
DLLEXPORT MPFaceMeshDetector * DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, MPFaceMeshDetectorConstruct(int numFaces,
const char *face_landmark_model_path); const char *face_detection_model_path,
const char *face_landmark_model_path,
bool with_attention = true,
const char* face_landmark_model_with_attention_path = "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
);
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector);

View File

@ -47,14 +47,18 @@ mediapipe_simple_subgraph(
graph = "face_landmark_side_model_cpu.pbtxt", graph = "face_landmark_side_model_cpu.pbtxt",
register_as = "FaceLandmarkSideModelCpu", register_as = "FaceLandmarkSideModelCpu",
deps = [ deps = [
":tensors_to_face_landmarks",
":tensors_to_face_landmarks_with_attention",
"//mediapipe/calculators/core:gate_calculator", "//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator", "//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator", "//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator", "//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator", "//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/framework/tool:switch_container",
], ],
) )

View File

@ -31,8 +31,12 @@ input_side_packet: "MODEL:0:face_detection_model"
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model # NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined. # only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:1:face_landmark_model" input_side_packet: "MODEL:1:face_landmark_model"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Collection of detected/predicted faces, each represented as a list of 468 face # Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# Collection of detected/predicted faces depends on with_attention, if true : each represented as a list of 468 face
# landmarks, if false: each represented as a list of 478 face
# landmarks. (std::vector<NormalizedLandmarkList>) # landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this # NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe # particular timestamp if none of faces detected. However, the MediaPipe
@ -207,6 +211,7 @@ node {
input_stream: "IMAGE:landmarks_loop_image" input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect" input_stream: "ROI:face_rect"
input_side_packet: "MODEL:face_landmark_model" input_side_packet: "MODEL:face_landmark_model"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:face_landmarks" output_stream: "LANDMARKS:face_landmarks"
} }

View File

@ -29,7 +29,10 @@ input_stream: "ROI:roi"
# only, can be passed here, otherwise - results are undefined. # only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:face_landmark_model" input_side_packet: "MODEL:face_landmark_model"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# 468 face landmarks within the given ROI. (NormalizedLandmarkList) # 468 face landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a face is not present within the given ROI, for this particular # NOTE: if a face is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However, # timestamp there will not be an output packet in the LANDMARKS stream. However,
@ -55,31 +58,56 @@ node: {
} }
} }
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and # vector of tensors representing, for instance, detection boxes/keypoints and
# scores. # scores.
node { node {
calculator: "InferenceCalculator" calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors" input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
input_side_packet: "MODEL:face_landmark_model" input_side_packet: "MODEL:face_landmark_model"
options { input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] { [mediapipe.InferenceCalculatorOptions.ext] {
delegate { tflite {} } delegate { tflite {} }
} }
} }
} }
# Splits a vector of tensors into multiple vectors. # Splits a vector of tensors into landmark tensors and face flag tensor.
node { node {
calculator: "SplitTensorVectorCalculator" calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "output_tensors" input_stream: "output_tensors"
output_stream: "landmark_tensors" output_stream: "landmark_tensors"
output_stream: "face_flag_tensor" output_stream: "face_flag_tensor"
options: { options: {
[mediapipe.SplitVectorCalculatorOptions.ext] { [mediapipe.SwitchContainerOptions.ext] {
ranges: { begin: 0 end: 1 } contained_node: {
ranges: { begin: 1 end: 2 } calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 6 }
ranges: { begin: 6 end: 7 }
}
}
}
} }
} }
} }
@ -121,14 +149,18 @@ node {
# Decodes the landmark tensors into a vector of landmarks, where the landmark # Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model. # coordinates are normalized by the size of the input image to the model.
node { node {
calculator: "TensorsToLandmarksCalculator" calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "TENSORS:ensured_landmark_tensors" input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks" output_stream: "LANDMARKS:landmarks"
options: { options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] { [mediapipe.SwitchContainerOptions.ext] {
num_landmarks: 468 contained_node: {
input_image_width: 192 calculator: "TensorsToFaceLandmarks"
input_image_height: 192 }
contained_node: {
calculator: "TensorsToFaceLandmarksWithAttention"
}
} }
} }
} }