Add attention model

This commit is contained in:
Pavlo-Ivan Mykhalevych 2021-12-13 15:10:03 +02:00
parent 1cf04343bc
commit d861abde7c
6 changed files with 118 additions and 33 deletions

View File

@ -27,9 +27,12 @@ int main(int argc, char **argv) {
"mediapipe/modules/face_detection/face_detection_short_range.tflite";
constexpr char face_landmark_model_path[] =
"mediapipe/modules/face_landmark/face_landmark.tflite";
constexpr char face_landmark_with_attention_model_path[] =
"mediapipe/modules/face_landmark/face_landmark_with_attention.tflite";
constexpr bool with_attention = true;
MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct(
maxNumFaces, face_detection_model_path, face_landmark_model_path);
maxNumFaces, face_detection_model_path, face_landmark_model_path, with_attention, face_landmark_with_attention_model_path);
// Allocate memory for face landmarks.
auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces];
@ -73,6 +76,10 @@ int main(int argc, char **argv) {
auto &face_landmarks = multiFaceLandmarks[0];
auto &landmark = face_landmarks[0];
for (auto i = 0; i < 478; ++i) {
cv::circle(camera_frame_raw, face_landmarks[i], 1.2, cv::Scalar(0, 0, 255));
}
LOG(INFO) << "First landmark: x - " << landmark.x << ", y - "
<< landmark.y;
}

View File

@ -1,20 +1,33 @@
#include "face_mesh_lib.h"
int MPFaceMeshDetector::kLandmarksNum = 468;
MPFaceMeshDetector::MPFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path) {
const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path,
face_landmark_model_path);
const char *face_landmark_model_path,
bool with_attention,
const char *face_landmark_with_attention_model_path) {
const auto status = InitFaceMeshDetector(
numFaces,
face_detection_model_path,
face_landmark_model_path,
with_attention,
face_landmark_with_attention_model_path);
if (!status.ok()) {
LOG(INFO) << "Failed constructing FaceMeshDetector.";
LOG(INFO) << status.message();
}
if (with_attention) {
kLandmarksNum = kLandmarksNumWithAttention;
}
}
absl::Status
MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path) {
const char *face_landmark_model_path,
bool with_attention,
const char *face_landmark_with_attention_model_path) {
numFaces = std::max(numFaces, 1);
if (face_detection_model_path == nullptr) {
@ -22,6 +35,10 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
"mediapipe/modules/face_detection/face_detection_short_range.tflite";
}
if (with_attention) {
face_landmark_model_path = face_landmark_with_attention_model_path;
}
if (face_landmark_model_path == nullptr) {
face_landmark_model_path =
"mediapipe/modules/face_landmark/face_landmark.tflite";
@ -30,6 +47,8 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
// Prepare graph config.
auto preparedGraphConfig = absl::StrReplaceAll(
graphConfig, {{"$numFaces", std::to_string(numFaces)}});
preparedGraphConfig = with_attention ? absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "true"} }) :
absl::StrReplaceAll( preparedGraphConfig, { {"$with_attention", "false"} });
preparedGraphConfig = absl::StrReplaceAll(
preparedGraphConfig,
{{"$faceDetectionModelPath", face_detection_model_path}});
@ -268,10 +287,13 @@ void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks,
extern "C" {
DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path) {
MPFaceMeshDetectorConstruct(int numFaces,
const char* face_detection_model_path,
const char* face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path){
return new MPFaceMeshDetector(numFaces, face_detection_model_path,
face_landmark_model_path);
face_landmark_model_path, with_attention, face_landmark_model_with_attention_path);
}
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) {
@ -331,10 +353,12 @@ node {
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: $numFaces }
packet { bool_value: $with_attention }
}
}
}
@ -374,6 +398,7 @@ node {
input_side_packet: "MODEL_BLOB:face_detection_model_blob"
output_side_packet: "MODEL:face_detection_model"
}
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:face_landmark_model_blob"
@ -388,6 +413,7 @@ node {
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "MODEL:0:face_detection_model"
input_side_packet: "MODEL:1:face_landmark_model"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"

View File

@ -31,8 +31,11 @@
class MPFaceMeshDetector {
public:
MPFaceMeshDetector(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path);
MPFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path);
void DetectFaces(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, int *numFaces);
@ -40,12 +43,16 @@ public:
void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces);
void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces);
static constexpr auto kLandmarksNum = 468;
static constexpr auto kLandmarksNumWithoutAttention = 468;
static constexpr auto kLandmarksNumWithAttention = 478;
static int kLandmarksNum;
private:
absl::Status InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path);
const char *face_landmark_model_path,
bool with_attention,
const char* face_landmark_model_with_attention_path);
absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes,
int *numFaces);
@ -79,8 +86,12 @@ extern "C" {
#endif
DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path);
MPFaceMeshDetectorConstruct(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path,
bool with_attention = true,
const char* face_landmark_model_with_attention_path = "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
);
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector);

View File

@ -47,14 +47,18 @@ mediapipe_simple_subgraph(
graph = "face_landmark_side_model_cpu.pbtxt",
register_as = "FaceLandmarkSideModelCpu",
deps = [
":tensors_to_face_landmarks",
":tensors_to_face_landmarks_with_attention",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/framework/tool:switch_container",
],
)

View File

@ -31,8 +31,12 @@ input_side_packet: "MODEL:0:face_detection_model"
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:1:face_landmark_model"
# Collection of detected/predicted faces, each represented as a list of 468 face
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# Collection of detected/predicted faces depends on with_attention, if true : each represented as a list of 468 face
# landmarks, if false: each represented as a list of 478 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
@ -207,6 +211,7 @@ node {
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "MODEL:face_landmark_model"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:face_landmarks"
}

View File

@ -29,7 +29,10 @@ input_stream: "ROI:roi"
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:face_landmark_model"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# 468 face landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a face is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
@ -55,33 +58,58 @@ node: {
}
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
input_side_packet: "MODEL:face_landmark_model"
options {
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate { tflite {} }
}
}
}
# Splits a vector of tensors into multiple vectors.
# Splits a vector of tensors into landmark tensors and face flag tensor.
node {
calculator: "SplitTensorVectorCalculator"
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "face_flag_tensor"
options: {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 6 }
ranges: { begin: 6 end: 7 }
}
}
}
}
}
}
# Converts the face-flag tensor into a float that represents the confidence
@ -121,14 +149,18 @@ node {
# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
output_stream: "LANDMARKS:landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 468
input_image_width: 192
input_image_height: 192
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "TensorsToFaceLandmarks"
}
contained_node: {
calculator: "TensorsToFaceLandmarksWithAttention"
}
}
}
}