Merge pull request #1 from maletsden/face_mesh_dll_example

Face mesh Windows DLL desktop example
This commit is contained in:
Denys Maletskyy 2021-08-26 16:36:32 +03:00 committed by GitHub
commit 901543a837
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 1652 additions and 0 deletions

View File

@ -18,6 +18,20 @@ licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "counting_vector_size_calculator",
srcs = ["counting_vector_size_calculator.cc"],
hdrs = ["counting_vector_size_calculator.h"],
visibility = [
"//visibility:public",
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
],
alwayslink = 1,
)
cc_library(
name = "alignment_points_to_rects_calculator",
srcs = ["alignment_points_to_rects_calculator.cc"],

View File

@ -0,0 +1,26 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/util/counting_vector_size_calculator.h"
#include "mediapipe/framework/formats/landmark.pb.h"
namespace mediapipe {
typedef CountingVectorSizeCalculator<
std::vector<::mediapipe::NormalizedLandmarkList>>
CountingNormalizedLandmarkListVectorSizeCalculator;
REGISTER_CALCULATOR(CountingNormalizedLandmarkListVectorSizeCalculator);
} // namespace mediapipe

View File

@ -0,0 +1,79 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H
#define MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
namespace mediapipe {
// A calculator that counts the size of the input vector. It was created to
// aid in polling packets in the output stream synchronously. If there is
// a clock stream, it will output a value of 0 even if the input vector stream
// is empty. If not, it will output some value only if there is an input vector.
// The clock stream must have the same time stamp as the vector stream, and
// it must be the stream where packets are transmitted while the graph is
// running. (e.g. Any input stream of graph)
//
// It is designed to be used like:
//
// Example config:
// node {
// calculator: "CountingWithVectorSizeCalculator"
// input_stream: "CLOCK:triger_signal"
// input_stream: "VECTOR:input_vector"
// output_stream: "COUNT:vector_count"
// }
//
// node {
// calculator: "CountingWithVectorSizeCalculator"
// input_stream: "VECTOR:input_vector"
// output_stream: "COUNT:vector_count"
// }
template <typename VectorT>
class CountingVectorSizeCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract *cc) {
if (cc->Inputs().HasTag("CLOCK")) {
cc->Inputs().Tag("CLOCK").SetAny();
}
RET_CHECK(cc->Inputs().HasTag("VECTOR"));
cc->Inputs().Tag("VECTOR").Set<VectorT>();
RET_CHECK(cc->Outputs().HasTag("COUNT"));
cc->Outputs().Tag("COUNT").Set<int>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status Process(CalculatorContext *cc) {
std::unique_ptr<int> face_count;
if (!cc->Inputs().Tag("VECTOR").IsEmpty()) {
const auto &landmarks = cc->Inputs().Tag("VECTOR").Get<VectorT>();
face_count = absl::make_unique<int>(landmarks.size());
} else {
face_count = absl::make_unique<int>(0);
}
cc->Outputs().Tag("COUNT").Add(face_count.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
};
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H

View File

@ -0,0 +1,66 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("windows_dll_library.bzl", "windows_dll_library")
licenses(["notice"])
filegroup(
name = "srcs",
srcs = glob(["**"]),
visibility = ["//examples:__pkg__"],
)
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
# Define the shared library
windows_dll_library(
name = "face_mesh_lib",
srcs = ["face_mesh_lib.cpp"],
hdrs = ["face_mesh_lib.h"],
# Define COMPILING_DLL to export symbols during compiling the DLL.
copts = ["-DCOMPILING_DLL"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:file_helpers",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_video",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/flags:parse",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tflite:tflite_model_calculator",
"//mediapipe/calculators/util:local_file_contents_calculator",
"//mediapipe/modules/face_landmark:face_landmark_front_side_model_cpu_with_face_counter",
]
)
# **Implicitly link to face_mesh_lib.dll**
## Link to face_mesh_lib.dll through its import library.
cc_binary(
name = "face_mesh_cpu",
srcs = ["face_mesh_cpu.cpp"],
deps = [
":face_mesh_lib",
],
)

View File

@ -0,0 +1,96 @@
#include "face_mesh_lib.h"
int main(int argc, char **argv) {
google::InitGoogleLogging(argv[0]);
absl::ParseCommandLine(argc, argv);
cv::VideoCapture capture;
capture.open(0);
if (!capture.isOpened()) {
return -1;
}
constexpr char kWindowName[] = "MediaPipe";
cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1);
#if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2)
capture.set(cv::CAP_PROP_FRAME_WIDTH, 640);
capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
capture.set(cv::CAP_PROP_FPS, 30);
#endif
LOG(INFO) << "VideoCapture initialized.";
// Maximum number of faces that can be detected
constexpr int maxNumFaces = 1;
constexpr char face_detection_model_path[] =
"mediapipe/modules/face_detection/face_detection_short_range.tflite";
constexpr char face_landmark_model_path[] =
"mediapipe/modules/face_landmark/face_landmark.tflite";
MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct(
maxNumFaces, face_detection_model_path, face_landmark_model_path);
// Allocate memory for face landmarks.
auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces];
for (int i = 0; i < maxNumFaces; ++i) {
multiFaceLandmarks[i] = new cv::Point2f[MPFaceMeshDetectorLandmarksNum];
}
std::vector<cv::Rect> multiFaceBoundingBoxes(maxNumFaces);
LOG(INFO) << "FaceMeshDetector constructed.";
LOG(INFO) << "Start grabbing and processing frames.";
bool grab_frames = true;
while (grab_frames) {
// Capture opencv camera.
cv::Mat camera_frame_raw;
capture >> camera_frame_raw;
if (camera_frame_raw.empty()) {
LOG(INFO) << "Ignore empty frames from camera.";
continue;
}
cv::Mat camera_frame;
cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB);
int faceCount = 0;
MPFaceMeshDetectorDetectFaces(faceMeshDetector, camera_frame,
multiFaceBoundingBoxes.data(), &faceCount);
if (faceCount > 0) {
auto &face_bounding_box = multiFaceBoundingBoxes[0];
cv::rectangle(camera_frame_raw, face_bounding_box, cv::Scalar(0, 255, 0),
3);
int landmarksNum = 0;
MPFaceMeshDetectorDetect2DLandmarks(faceMeshDetector, multiFaceLandmarks,
&landmarksNum);
auto &face_landmarks = multiFaceLandmarks[0];
auto &landmark = face_landmarks[0];
LOG(INFO) << "First landmark: x - " << landmark.x << ", y - "
<< landmark.y;
}
const int pressed_key = cv::waitKey(5);
if (pressed_key >= 0 && pressed_key != 255)
grab_frames = false;
cv::imshow(kWindowName, camera_frame_raw);
}
LOG(INFO) << "Shutting down.";
// Deallocate memory for face landmarks.
for (int i = 0; i < maxNumFaces; ++i) {
delete[] multiFaceLandmarks[i];
}
delete[] multiFaceLandmarks;
MPFaceMeshDetectorDestruct(faceMeshDetector);
}

View File

@ -0,0 +1,398 @@
#include "face_mesh_lib.h"
MPFaceMeshDetector::MPFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path) {
const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path,
face_landmark_model_path);
if (!status.ok()) {
LOG(INFO) << "Failed constructing FaceMeshDetector.";
LOG(INFO) << status.message();
}
}
absl::Status
MPFaceMeshDetector::InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path) {
numFaces = std::max(numFaces, 1);
if (face_detection_model_path == nullptr) {
face_detection_model_path =
"mediapipe/modules/face_detection/face_detection_short_range.tflite";
}
if (face_landmark_model_path == nullptr) {
face_landmark_model_path =
"mediapipe/modules/face_landmark/face_landmark.tflite";
}
// Prepare graph config.
auto preparedGraphConfig = absl::StrReplaceAll(
graphConfig, {{"$numFaces", std::to_string(numFaces)}});
preparedGraphConfig = absl::StrReplaceAll(
preparedGraphConfig,
{{"$faceDetectionModelPath", face_detection_model_path}});
preparedGraphConfig = absl::StrReplaceAll(
preparedGraphConfig,
{{"$faceLandmarkModelPath", face_landmark_model_path}});
LOG(INFO) << "Get calculator graph config contents: " << preparedGraphConfig;
mediapipe::CalculatorGraphConfig config =
mediapipe::ParseTextProtoOrDie<mediapipe::CalculatorGraphConfig>(
preparedGraphConfig);
LOG(INFO) << "Initialize the calculator graph.";
MP_RETURN_IF_ERROR(graph.Initialize(config));
LOG(INFO) << "Start running the calculator graph.";
ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarks_poller,
graph.AddOutputStreamPoller(kOutputStream_landmarks));
ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller,
graph.AddOutputStreamPoller(kOutputStream_faceCount));
ASSIGN_OR_RETURN(
mediapipe::OutputStreamPoller face_rects_from_landmarks_poller,
graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks));
landmarks_poller_ptr = std::make_unique<mediapipe::OutputStreamPoller>(
std::move(landmarks_poller));
face_count_poller_ptr = std::make_unique<mediapipe::OutputStreamPoller>(
std::move(face_count_poller));
face_rects_from_landmarks_poller_ptr =
std::make_unique<mediapipe::OutputStreamPoller>(
std::move(face_rects_from_landmarks_poller));
MP_RETURN_IF_ERROR(graph.StartRun({}));
LOG(INFO) << "MPFaceMeshDetector constructed successfully.";
return absl::OkStatus();
}
absl::Status
MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes,
int *numFaces) {
if (!numFaces || !multi_face_bounding_boxes) {
return absl::InvalidArgumentError(
"MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to "
"save results data.");
}
// Reset face counts.
*numFaces = 0;
face_count = 0;
// Wrap Mat into an ImageFrame.
auto input_frame = absl::make_unique<mediapipe::ImageFrame>(
mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows,
mediapipe::ImageFrame::kDefaultAlignmentBoundary);
cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get());
camera_frame.copyTo(input_frame_mat);
// Send image packet into the graph.
size_t frame_timestamp_us = static_cast<double>(cv::getTickCount()) /
static_cast<double>(cv::getTickFrequency()) * 1e6;
MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
kInputStream, mediapipe::Adopt(input_frame.release())
.At(mediapipe::Timestamp(frame_timestamp_us))));
// Get face count.
mediapipe::Packet face_count_packet;
if (!face_count_poller_ptr ||
!face_count_poller_ptr->Next(&face_count_packet)) {
return absl::CancelledError(
"Failed during getting next face_count_packet.");
}
auto &face_count_val = face_count_packet.Get<int>();
if (face_count_val <= 0) {
return absl::OkStatus();
}
// Get face bounding boxes.
mediapipe::Packet face_rects_from_landmarks_packet;
if (!face_rects_from_landmarks_poller_ptr ||
!face_rects_from_landmarks_poller_ptr->Next(
&face_rects_from_landmarks_packet)) {
return absl::CancelledError(
"Failed during getting next face_rects_from_landmarks_packet.");
}
auto &face_bounding_boxes =
face_rects_from_landmarks_packet
.Get<::std::vector<::mediapipe::NormalizedRect>>();
image_width = camera_frame.cols;
image_height = camera_frame.rows;
const auto image_width_f = static_cast<float>(image_width);
const auto image_height_f = static_cast<float>(image_height);
// Convert vector<NormalizedRect> (center based Rects) to cv::Rect*
// (leftTop based Rects).
for (int i = 0; i < face_count_val; ++i) {
const auto &normalized_bounding_box = face_bounding_boxes[i];
auto &bounding_box = multi_face_bounding_boxes[i];
const auto width =
static_cast<int>(normalized_bounding_box.width() * image_width_f);
const auto height =
static_cast<int>(normalized_bounding_box.height() * image_height_f);
bounding_box.x =
static_cast<int>(normalized_bounding_box.x_center() * image_width_f) -
(width >> 1);
bounding_box.y =
static_cast<int>(normalized_bounding_box.y_center() * image_height_f) -
(height >> 1);
bounding_box.width = width;
bounding_box.height = height;
}
// Get face landmarks.
if (!landmarks_poller_ptr ||
!landmarks_poller_ptr->Next(&face_landmarks_packet)) {
return absl::CancelledError("Failed during getting next landmarks_packet.");
}
*numFaces = face_count_val;
face_count = face_count_val;
return absl::OkStatus();
}
void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes,
int *numFaces) {
const auto status =
DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces);
if (!status.ok()) {
LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message();
}
}
absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(
cv::Point2f **multi_face_landmarks) {
if (face_landmarks_packet.IsEmpty()) {
return absl::CancelledError("Face landmarks packet is empty.");
}
auto &face_landmarks =
face_landmarks_packet
.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>();
const auto image_width_f = static_cast<float>(image_width);
const auto image_height_f = static_cast<float>(image_height);
// Convert landmarks to cv::Point2f**.
for (int i = 0; i < face_count; ++i) {
const auto &normalizedLandmarkList = face_landmarks[i];
const auto landmarks_num = normalizedLandmarkList.landmark_size();
if (landmarks_num != kLandmarksNum) {
return absl::CancelledError("Detected unexpected landmarks number.");
}
auto &face_landmarks = multi_face_landmarks[i];
for (int j = 0; j < landmarks_num; ++j) {
const auto &landmark = normalizedLandmarkList.landmark(j);
face_landmarks[j].x = landmark.x() * image_width_f;
face_landmarks[j].y = landmark.y() * image_height_f;
}
}
return absl::OkStatus();
}
absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(
cv::Point3f **multi_face_landmarks) {
if (face_landmarks_packet.IsEmpty()) {
return absl::CancelledError("Face landmarks packet is empty.");
}
auto &face_landmarks =
face_landmarks_packet
.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>();
const auto image_width_f = static_cast<float>(image_width);
const auto image_height_f = static_cast<float>(image_height);
// Convert landmarks to cv::Point3f**.
for (int i = 0; i < face_count; ++i) {
const auto &normalized_landmark_list = face_landmarks[i];
const auto landmarks_num = normalized_landmark_list.landmark_size();
if (landmarks_num != kLandmarksNum) {
return absl::CancelledError("Detected unexpected landmarks number.");
}
auto &face_landmarks = multi_face_landmarks[i];
for (int j = 0; j < landmarks_num; ++j) {
const auto &landmark = normalized_landmark_list.landmark(j);
face_landmarks[j].x = landmark.x() * image_width_f;
face_landmarks[j].y = landmark.y() * image_height_f;
face_landmarks[j].z = landmark.z();
}
}
return absl::OkStatus();
}
void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks,
int *numFaces) {
*numFaces = 0;
const auto status = DetectLandmarksWithStatus(multi_face_landmarks);
if (!status.ok()) {
LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: "
<< status.message();
}
*numFaces = face_count;
}
void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks,
int *numFaces) {
*numFaces = 0;
const auto status = DetectLandmarksWithStatus(multi_face_landmarks);
if (!status.ok()) {
LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: "
<< status.message();
}
*numFaces = face_count;
}
extern "C" {
DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path) {
return new MPFaceMeshDetector(numFaces, face_detection_model_path,
face_landmark_model_path);
}
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) {
delete detector;
}
DLLEXPORT void MPFaceMeshDetectorDetectFaces(
MPFaceMeshDetector *detector, const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, int *numFaces) {
detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces);
}
DLLEXPORT void
MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector,
cv::Point2f **multi_face_landmarks,
int *numFaces) {
detector->DetectLandmarks(multi_face_landmarks, numFaces);
}
DLLEXPORT void
MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector,
cv::Point3f **multi_face_landmarks,
int *numFaces) {
detector->DetectLandmarks(multi_face_landmarks, numFaces);
}
DLLEXPORT const int MPFaceMeshDetectorLandmarksNum =
MPFaceMeshDetector::kLandmarksNum;
}
const std::string MPFaceMeshDetector::graphConfig = R"pb(
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# Input image. (ImageFrame)
input_stream: "input_video"
# Collection of detected/processed faces, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
output_stream: "multi_face_landmarks"
# Detected faces count. (int)
output_stream: "face_count"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "face_rects_from_landmarks"
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:face_count"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: $numFaces }
}
}
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:face_detection_model_path"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet { string_value: "$faceDetectionModelPath" }
}
}
}
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:face_landmark_model_path"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { string_value: "$faceLandmarkModelPath" }
}
}
}
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:0:face_detection_model_path"
input_side_packet: "FILE_PATH:1:face_landmark_model_path"
output_side_packet: "CONTENTS:0:face_detection_model_blob"
output_side_packet: "CONTENTS:1:face_landmark_model_blob"
}
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:face_detection_model_blob"
output_side_packet: "MODEL:face_detection_model"
}
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:face_landmark_model_blob"
output_side_packet: "MODEL:face_landmark_model"
}
# Subgraph that detects faces and corresponding landmarks.
node {
calculator: "FaceLandmarkFrontSideModelCpuWithFaceCounter"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "MODEL:0:face_detection_model"
input_side_packet: "MODEL:1:face_landmark_model"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count"
}
)pb";

View File

@ -0,0 +1,105 @@
#ifndef FACE_MESH_LIBRARY_H
#define FACE_MESH_LIBRARY_H
#ifdef COMPILING_DLL
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT __declspec(dllimport)
#endif
#include <cstdlib>
#include <memory>
#include <string>
#include <windows.h>
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/strings/str_replace.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_graph.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/output_stream_poller.h"
#include "mediapipe/framework/port/file_helpers.h"
#include "mediapipe/framework/port/opencv_highgui_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/opencv_video_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status.h"
class MPFaceMeshDetector {
public:
MPFaceMeshDetector(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path);
void DetectFaces(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, int *numFaces);
void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces);
void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces);
static constexpr auto kLandmarksNum = 468;
private:
absl::Status InitFaceMeshDetector(int numFaces,
const char *face_detection_model_path,
const char *face_landmark_model_path);
absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes,
int *numFaces);
absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks);
absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks);
static constexpr auto kInputStream = "input_video";
static constexpr auto kOutputStream_landmarks = "multi_face_landmarks";
static constexpr auto kOutputStream_faceCount = "face_count";
static constexpr auto kOutputStream_face_rects_from_landmarks =
"face_rects_from_landmarks";
static const std::string graphConfig;
mediapipe::CalculatorGraph graph;
std::unique_ptr<mediapipe::OutputStreamPoller> landmarks_poller_ptr;
std::unique_ptr<mediapipe::OutputStreamPoller> face_count_poller_ptr;
std::unique_ptr<mediapipe::OutputStreamPoller>
face_rects_from_landmarks_poller_ptr;
int face_count;
int image_width;
int image_height;
mediapipe::Packet face_landmarks_packet;
};
#ifdef __cplusplus
extern "C" {
#endif
DLLEXPORT MPFaceMeshDetector *
MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path,
const char *face_landmark_model_path);
DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector);
DLLEXPORT void MPFaceMeshDetectorDetectFaces(
MPFaceMeshDetector *detector, const cv::Mat &camera_frame,
cv::Rect *multi_face_bounding_boxes, int *numFaces);
DLLEXPORT void
MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector,
cv::Point2f **multi_face_landmarks,
int *numFaces);
DLLEXPORT void
MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector,
cv::Point3f **multi_face_landmarks,
int *numFaces);
DLLEXPORT extern const int MPFaceMeshDetectorLandmarksNum;
#ifdef __cplusplus
};
#endif
#endif

View File

@ -0,0 +1,62 @@
"""
This is a simple windows_dll_library rule for builing a DLL Windows
that can be depended on by other cc rules.
Example useage:
windows_dll_library(
name = "hellolib",
srcs = [
"hello-library.cpp",
],
hdrs = ["hello-library.h"],
# Define COMPILING_DLL to export symbols during compiling the DLL.
copts = ["/DCOMPILING_DLL"],
)
"""
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library")
def windows_dll_library(
name,
srcs = [],
deps = [],
hdrs = [],
visibility = None,
**kwargs):
"""A simple windows_dll_library rule for builing a DLL Windows."""
dll_name = name + ".dll"
import_lib_name = name + "_import_lib"
import_target_name = name + "_dll_import"
# Build the shared library
cc_binary(
name = dll_name,
srcs = srcs + hdrs,
deps = deps,
linkshared = 1,
**kwargs
)
# Get the import library for the dll
native.filegroup(
name = import_lib_name,
srcs = [":" + dll_name],
output_group = "interface_library",
)
# Because we cannot directly depend on cc_binary from other cc rules in deps attribute,
# we use cc_import as a bridge to depend on the dll.
cc_import(
name = import_target_name,
interface_library = ":" + import_lib_name,
shared_library = ":" + dll_name,
)
# Create a new cc_library to also include the headers needed for the shared library
cc_library(
name = name,
hdrs = hdrs,
visibility = visibility,
deps = deps + [
":" + import_target_name,
],
)

View File

@ -57,6 +57,18 @@ mediapipe_simple_subgraph(
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_side_model_cpu",
graph = "face_detection_short_range_side_model_cpu.pbtxt",
register_as = "FaceDetectionShortRangeSideModelCpu",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_gpu",
graph = "face_detection_short_range_gpu.pbtxt",

View File

@ -0,0 +1,86 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeCpu"
# input_stream: "IMAGE:image"
# input_side_packet: "MODEL:face_detection_model"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# TfLite model to detect faces.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
# model only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:face_detection_model"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
input_side_packet: "MODEL:face_detection_model"
options {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate { tflite {} }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -37,6 +37,22 @@ mediapipe_simple_subgraph(
],
)
mediapipe_simple_subgraph(
name = "face_landmark_side_model_cpu",
graph = "face_landmark_side_model_cpu.pbtxt",
register_as = "FaceLandmarkSideModelCpu",
deps = [
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_gpu",
graph = "face_landmark_gpu.pbtxt",
@ -74,6 +90,50 @@ mediapipe_simple_subgraph(
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_cpu_with_face_counter",
graph = "face_landmark_front_cpu_with_face_counter.pbtxt",
register_as = "FaceLandmarkFrontCpuWithFaceCounter",
deps = [
":face_detection_front_detection_to_roi",
":face_landmark_cpu",
":face_landmark_landmarks_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:counting_vector_size_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_side_model_cpu_with_face_counter",
graph = "face_landmark_front_side_model_cpu_with_face_counter.pbtxt",
register_as = "FaceLandmarkFrontSideModelCpuWithFaceCounter",
deps = [
":face_detection_front_detection_to_roi",
":face_landmark_side_model_cpu",
":face_landmark_landmarks_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:counting_vector_size_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_side_model_cpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_gpu",
graph = "face_landmark_front_gpu.pbtxt",

View File

@ -0,0 +1,249 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontCpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# (int)
output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count"
# Defines whether landmarks on the previous image should be used to help
# localize landmarks on the current image.
node {
name: "ConstantSidePacketCalculator"
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:use_prev_landmarks"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet { bool_value: true }
}
}
}
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "gated_prev_face_rects_from_landmarks"
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if enough faces have already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of face detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:gated_image"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Counting a multi_faceLandmarks vector size. The image stream is only used to
# make the calculator work even when there is no input vector.
node {
calculator: "CountingNormalizedLandmarkListVectorSizeCalculator"
input_stream: "CLOCK:image"
input_stream: "VECTOR:multi_face_landmarks"
output_stream: "COUNT:face_count"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "gated_prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkCpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -0,0 +1,256 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontSideModelCpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# input_side_packet: "MODEL:0:face_detection_model"
# input_side_packet: "MODEL:1:face_landmark_model"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontSideModelCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# TfLite model to detect faces.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
# model only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:0:face_detection_model"
# TfLite model to detect face landmarks.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:1:face_landmark_model"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# (int)
output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count"
# Defines whether landmarks on the previous image should be used to help
# localize landmarks on the current image.
node {
name: "ConstantSidePacketCalculator"
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:use_prev_landmarks"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet { bool_value: true }
}
}
}
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "gated_prev_face_rects_from_landmarks"
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if FaceLandmarkCpu was able to identify face presence
# in the previous image. Otherwise, passes the incoming image through to trigger
# a new round of face detection in FaceDetectionShortRangeCpu.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeSideModelCpu"
input_stream: "IMAGE:gated_image"
input_side_packet: "MODEL:face_detection_model"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Counting a multi_faceLandmarks vector size. The image stream is only used to
# make the calculator work even when there is no input vector.
node {
calculator: "CountingNormalizedLandmarkListVectorSizeCalculator"
input_stream: "CLOCK:image"
input_stream: "VECTOR:multi_face_landmarks"
output_stream: "COUNT:face_count"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkSideModelCpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "MODEL:face_landmark_model"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -0,0 +1,143 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.)
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkCpu"
# input_stream: "IMAGE:image"
# input_stream: "ROI:face_roi"
# input_side_packet: "MODEL:face_landmark_model"
# output_stream: "LANDMARKS:face_landmarks"
# }
type: "FaceLandmarkCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a face is located.
# (NormalizedRect)
input_stream: "ROI:roi"
# TfLite model to detect face landmarks.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:face_landmark_model"
# 468 face landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a face is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:face_landmarks"
# Transforms the input image into a 192x192 tensor.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
output_tensor_float_range {
min: 0.0
max: 1.0
}
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
input_side_packet: "MODEL:face_landmark_model"
options {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate { tflite {} }
}
}
}
# Splits a vector of tensors into multiple vectors.
node {
calculator: "SplitTensorVectorCalculator"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "face_flag_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
# Converts the face-flag tensor into a float that represents the confidence
# score of face presence.
node {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:face_flag_tensor"
output_stream: "FLOAT:face_presence_score"
options {
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
activation: SIGMOID
}
}
}
# Applies a threshold to the confidence score to determine whether a face is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:face_presence_score"
output_stream: "FLAG:face_presence"
options: {
[mediapipe.ThresholdingCalculatorOptions.ext] {
threshold: 0.5
}
}
}
# Drop landmarks tensors if face is not present.
node {
calculator: "GateCalculator"
input_stream: "landmark_tensors"
input_stream: "ALLOW:face_presence"
output_stream: "ensured_landmark_tensors"
}
# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 468
input_image_width: 192
input_image_height: 192
}
}
}
# Projects the landmarks from the cropped face image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
input_stream: "NORM_RECT:roi"
output_stream: "NORM_LANDMARKS:face_landmarks"
}