Update TensorsToFaceLandmarksGraph to support face mesh v2 model.

PiperOrigin-RevId: 523814749
This commit is contained in:
MediaPipe Team 2023-04-12 15:00:53 -07:00 committed by Copybara-Service
parent 468d10e947
commit 0179f0c456
16 changed files with 2560 additions and 4633 deletions

View File

@ -68,19 +68,17 @@ using ::testing::proto::Approximately;
using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kFaceLandmarkerModelBundleName[] = "face_landmarker.task";
constexpr char kFaceLandmarkerModelBundleName[] = "face_landmarker_v2.task";
constexpr char kFaceLandmarkerWithBlendshapesModelBundleName[] =
"face_landmarker_with_blendshapes.task";
"face_landmarker_v2_with_blendshapes.task";
constexpr char kPortraitImageName[] = "portrait.jpg";
constexpr char kCatImageName[] = "cat.jpg";
constexpr char kPortraitExpectedFaceLandamrksName[] =
constexpr char kPortraitExpectedFaceLandmarksName[] =
"portrait_expected_face_landmarks.pbtxt";
constexpr char kPortraitExpectedFaceLandamrksWithAttentionName[] =
"portrait_expected_face_landmarks_with_attention.pbtxt";
constexpr char kPortraitExpectedBlendshapesName[] =
"portrait_expected_blendshapes_with_attention.pbtxt";
"portrait_expected_blendshapes.pbtxt";
constexpr char kPortraitExpectedFaceGeometryName[] =
"portrait_expected_face_geometry_with_attention.pbtxt";
"portrait_expected_face_geometry.pbtxt";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image";
@ -95,7 +93,7 @@ constexpr char kFaceGeometryName[] = "face_geometry";
constexpr float kLandmarksDiffMargin = 0.03;
constexpr float kBlendshapesDiffMargin = 0.1;
constexpr float kFaceGeometryDiffMargin = 0.01;
constexpr float kFaceGeometryDiffMargin = 0.02;
template <typename ProtoT>
ProtoT GetExpectedProto(absl::string_view filename) {
@ -250,7 +248,7 @@ INSTANTIATE_TEST_SUITE_P(
/* test_image_name= */ kPortraitImageName,
/* expected_landmarks_list= */
{{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksName)}},
kPortraitExpectedFaceLandmarksName)}},
/* expected_blendshapes= */ std::nullopt,
/* expected_face_geometry= */ std::nullopt,
/* landmarks_diff_threshold= */ kLandmarksDiffMargin,
@ -275,7 +273,7 @@ INSTANTIATE_TEST_SUITE_P(
/* test_image_name= */ kPortraitImageName,
/* expected_landmarks_list= */
{{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksWithAttentionName)}},
kPortraitExpectedFaceLandmarksName)}},
/* expected_blendshapes= */
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}},
@ -290,7 +288,7 @@ INSTANTIATE_TEST_SUITE_P(
/* test_image_name= */ kPortraitImageName,
/* expected_landmarks_list= */
{{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksWithAttentionName)}},
kPortraitExpectedFaceLandmarksName)}},
/* expected_blendshapes= */
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}},

View File

@ -43,6 +43,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h"
#include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
@ -59,19 +60,18 @@ using ::testing::TestWithParam;
using ::testing::Values;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kFaceLandmarkerModelBundleName[] = "face_landmarker.task";
constexpr char kFaceLandmarkerWithBlendshapesModelBundleName[] =
"face_landmarker_with_blendshapes.task";
"face_landmarker_v2_with_blendshapes.task";
constexpr char kPortraitImageName[] = "portrait.jpg";
constexpr char kPortraitExpectedFaceLandamrksName[] =
constexpr char kPortraitExpectedFaceLandmarksName[] =
"portrait_expected_face_landmarks.pbtxt";
constexpr char kPortraitExpectedFaceLandmarksWithAttentionName[] =
"portrait_expected_face_landmarks_with_attention.pbtxt";
constexpr char kPortraitExpectedBlendshapesName[] =
"portrait_expected_blendshapes_with_attention.pbtxt";
"portrait_expected_blendshapes.pbtxt";
constexpr char kPortaitExpectedFaceGeomertyName[] =
"portrait_expected_face_geometry.pbtxt";
constexpr float kLandmarksDiffMargin = 0.03;
constexpr float kBlendshapesDiffMargin = 0.1;
constexpr float kBlendshapesDiffMargin = 0.12;
constexpr float kFacialTransformationMatrixDiffMargin = 0.02;
template <typename ProtoT>
@ -99,13 +99,9 @@ struct FaceLandmarkerTestParams {
};
mediapipe::MatrixData MakePortraitExpectedFacialTransformationMatrix() {
const Matrix matrix{{0.9995292, -0.005092691, 0.030254554, -0.37340546},
{0.0072318087, 0.99744856, -0.07102106, 22.212194},
{-0.029815676, 0.07120642, 0.9970159, -64.76358},
{0, 0, 0, 1}};
mediapipe::MatrixData matrix_data;
MatrixDataProtoFromMatrix(matrix, &matrix_data);
return matrix_data;
auto face_geometry = GetExpectedProto<face_geometry::proto::FaceGeometry>(
kPortaitExpectedFaceGeomertyName);
return face_geometry.pose_transform_matrix();
}
testing::Matcher<components::containers::NormalizedLandmark> LandmarkIs(
@ -232,17 +228,8 @@ TEST_P(ImageModeTest, Succeeds) {
INSTANTIATE_TEST_SUITE_P(
FaceLandmarkerTest, ImageModeTest,
Values(FaceLandmarkerTestParams{
/* test_name= */ "Portrait",
/* input_model_name= */ kFaceLandmarkerModelBundleName,
/* test_image_name= */ kPortraitImageName,
/* rotation= */ 0,
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithAttention",
Values(
FaceLandmarkerTestParams{/* test_name= */ "PortraitV2",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -250,9 +237,8 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithBlendshapes",
kPortraitExpectedFaceLandmarksName)})},
FaceLandmarkerTestParams{/* test_name= */ "PortraitWithBlendshapes",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -260,7 +246,7 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)},
kPortraitExpectedFaceLandmarksName)},
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}})},
FaceLandmarkerTestParams{
@ -273,7 +259,7 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)},
kPortraitExpectedFaceLandmarksName)},
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}},
{{MakePortraitExpectedFacialTransformationMatrix()}})}),
@ -318,17 +304,9 @@ TEST_P(VideoModeTest, Succeeds) {
INSTANTIATE_TEST_SUITE_P(
FaceLandmarkerTest, VideoModeTest,
Values(FaceLandmarkerTestParams{
/* test_name= */ "Portrait",
/* input_model_name= */ kFaceLandmarkerModelBundleName,
/* test_image_name= */ kPortraitImageName,
/* rotation= */ 0,
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithAttention",
Values(
FaceLandmarkerTestParams{/* test_name= */ "Portrait",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -336,9 +314,8 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithBlendshapes",
kPortraitExpectedFaceLandmarksName)})},
FaceLandmarkerTestParams{/* test_name= */ "PortraitWithBlendshapes",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -346,7 +323,7 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)},
kPortraitExpectedFaceLandmarksName)},
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}})}),
[](const TestParamInfo<VideoModeTest::ParamType>& info) {
@ -413,17 +390,8 @@ TEST_P(LiveStreamModeTest, Succeeds) {
INSTANTIATE_TEST_SUITE_P(
FaceLandmarkerTest, LiveStreamModeTest,
Values(FaceLandmarkerTestParams{
/* test_name= */ "Portrait",
/* input_model_name= */ kFaceLandmarkerModelBundleName,
/* test_image_name= */ kPortraitImageName,
/* rotation= */ 0,
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandamrksName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithAttention",
Values(
FaceLandmarkerTestParams{/* test_name= */ "Portrait",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -431,9 +399,8 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)})},
FaceLandmarkerTestParams{
/* test_name= */ "PortraitWithBlendshapes",
kPortraitExpectedFaceLandmarksName)})},
FaceLandmarkerTestParams{/* test_name= */ "PortraitWithBlendshapes",
/* input_model_name= */
kFaceLandmarkerWithBlendshapesModelBundleName,
/* test_image_name= */ kPortraitImageName,
@ -441,7 +408,7 @@ INSTANTIATE_TEST_SUITE_P(
/* expected_result= */
ConvertToFaceLandmarkerResult(
{GetExpectedProto<NormalizedLandmarkList>(
kPortraitExpectedFaceLandmarksWithAttentionName)},
kPortraitExpectedFaceLandmarksName)},
{{GetExpectedProto<ClassificationList>(
kPortraitExpectedBlendshapesName)}})}),
[](const TestParamInfo<LiveStreamModeTest::ParamType>& info) {

View File

@ -82,8 +82,6 @@ constexpr char kBlendshapesTag[] = "BLENDSHAPES";
// a landmarks tensor and a scores tensor
constexpr int kFaceLandmarksOutputTensorsNum = 2;
// 6 landmarks tensors and a scores tensor.
constexpr int kAttentionMeshOutputTensorsNum = 7;
struct SingleFaceLandmarksOutputs {
Stream<NormalizedLandmarkList> landmarks;
@ -116,15 +114,7 @@ absl::Status SanityCheckOptions(
// Split face landmark detection model output tensor into two parts,
// representing landmarks and face presence scores.
void ConfigureSplitTensorVectorCalculator(
bool is_attention_model, mediapipe::SplitVectorCalculatorOptions* options) {
if (is_attention_model) {
auto* range = options->add_ranges();
range->set_begin(0);
range->set_end(kAttentionMeshOutputTensorsNum - 1);
range = options->add_ranges();
range->set_begin(kAttentionMeshOutputTensorsNum - 1);
range->set_end(kAttentionMeshOutputTensorsNum);
} else {
mediapipe::SplitVectorCalculatorOptions* options) {
auto* range = options->add_ranges();
range->set_begin(0);
range->set_end(kFaceLandmarksOutputTensorsNum - 1);
@ -132,12 +122,10 @@ void ConfigureSplitTensorVectorCalculator(
range->set_begin(kFaceLandmarksOutputTensorsNum - 1);
range->set_end(kFaceLandmarksOutputTensorsNum);
}
}
void ConfigureTensorsToFaceLandmarksGraph(
const ImageTensorSpecs& input_image_tensor_spec, bool is_attention_model,
const ImageTensorSpecs& input_image_tensor_spec,
proto::TensorsToFaceLandmarksGraphOptions* options) {
options->set_is_attention_model(is_attention_model);
options->set_input_image_height(input_image_tensor_spec.image_height);
options->set_input_image_width(input_image_tensor_spec.image_width);
}
@ -160,12 +148,6 @@ void ConfigureFaceRectTransformationCalculator(
options->set_square_long(true);
}
bool IsAttentionModel(const core::ModelResources& model_resources) {
const auto* model = model_resources.GetTfLiteModel();
const auto* primary_subgraph = (*model->subgraphs())[0];
return primary_subgraph->outputs()->size() == kAttentionMeshOutputTensorsNum;
}
} // namespace
// A "mediapipe.tasks.vision.face_landmarker.SingleFaceLandmarksDetectorGraph"
@ -342,10 +324,8 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
auto output_tensors = inference.Out(kTensorsTag);
// Split model output tensors to multiple streams.
bool is_attention_model = IsAttentionModel(model_resources);
auto& split_tensors_vector = graph.AddNode("SplitTensorVectorCalculator");
ConfigureSplitTensorVectorCalculator(
is_attention_model,
&split_tensors_vector
.GetOptions<mediapipe::SplitVectorCalculatorOptions>());
output_tensors >> split_tensors_vector.In("");
@ -359,7 +339,7 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
auto& tensors_to_face_landmarks = graph.AddNode(
"mediapipe.tasks.vision.face_landmarker.TensorsToFaceLandmarksGraph");
ConfigureTensorsToFaceLandmarksGraph(
image_tensor_specs, is_attention_model,
image_tensor_specs,
&tensors_to_face_landmarks
.GetOptions<proto::TensorsToFaceLandmarksGraphOptions>());
landmark_tensors >> tensors_to_face_landmarks.In(kTensorsTag);

View File

@ -65,18 +65,15 @@ using ::testing::proto::Approximately;
using ::testing::proto::Partially;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kFaceLandmarksDetectionModel[] = "face_landmark.tflite";
constexpr char kFaceLandmarksDetectionWithAttentionModel[] =
"face_landmark_with_attention.tflite";
constexpr char kFaceLandmarksV2Model[] =
"facemesh2_lite_iris_faceflag_2023_02_14.tflite";
constexpr char kPortraitImageName[] = "portrait.jpg";
constexpr char kCatImageName[] = "cat.jpg";
constexpr char kPortraitExpectedFaceLandamrksName[] =
constexpr char kPortraitExpectedFaceLandmarksName[] =
"portrait_expected_face_landmarks.pbtxt";
constexpr char kPortraitExpectedFaceLandamrksWithAttentionName[] =
"portrait_expected_face_landmarks_with_attention.pbtxt";
constexpr char kFaceBlendshapesModel[] = "face_blendshapes.tflite";
constexpr char kPortraitExpectedBlendshapesName[] =
"portrait_expected_blendshapes_with_attention.pbtxt";
"portrait_expected_blendshapes.pbtxt";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageName[] = "image";
@ -363,35 +360,22 @@ TEST_P(MultiFaceLandmarksDetectionTest, Succeeds) {
INSTANTIATE_TEST_SUITE_P(
FaceLandmarksDetectionTest, SingleFaceLandmarksDetectionTest,
Values(SingeFaceTestParams{
/* test_name= */ "Portrait",
/* landmarks_model_name= */ kFaceLandmarksDetectionModel,
/* test_name= */ "PortraitV2",
/* landmarks_model_name= */
kFaceLandmarksV2Model,
/* blendshape_model_name= */ std::nullopt,
/* test_image_name= */ kPortraitImageName,
/* norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
/* expected_presence= */ true,
/* expected_landmarks= */
GetExpectedLandmarkList(kPortraitExpectedFaceLandamrksName),
GetExpectedLandmarkList(kPortraitExpectedFaceLandmarksName),
/* expected_blendshapes= */ std::nullopt,
/* landmarks_diff_threshold= */ kFractionDiff,
/* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
SingeFaceTestParams{
/* test_name= */ "PortraitWithAttention",
/* test_name= */ "PortraitV2WithBlendshapes",
/* landmarks_model_name= */
kFaceLandmarksDetectionWithAttentionModel,
/* blendshape_model_name= */ std::nullopt,
/* test_image_name= */ kPortraitImageName,
/* norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
/* expected_presence= */ true,
/* expected_landmarks= */
GetExpectedLandmarkList(
kPortraitExpectedFaceLandamrksWithAttentionName),
/* expected_blendshapes= */ std::nullopt,
/* landmarks_diff_threshold= */ kFractionDiff,
/* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
SingeFaceTestParams{
/* test_name= */ "PortraitWithAttentionWithBlendshapes",
/* landmarks_model_name= */
kFaceLandmarksDetectionWithAttentionModel,
kFaceLandmarksV2Model,
/* blendshape_model_name= */ kFaceBlendshapesModel,
/* test_image_name= */ kPortraitImageName,
/* norm_rect= */
@ -399,8 +383,7 @@ INSTANTIATE_TEST_SUITE_P(
0.008304443),
/* expected_presence= */ true,
/* expected_landmarks= */
GetExpectedLandmarkList(
kPortraitExpectedFaceLandamrksWithAttentionName),
GetExpectedLandmarkList(kPortraitExpectedFaceLandmarksName),
/* expected_blendshapes= */
GetBlendshapes(kPortraitExpectedBlendshapesName),
/* landmarks_diff_threshold= */ kFractionDiff,
@ -414,35 +397,22 @@ INSTANTIATE_TEST_SUITE_P(
FaceLandmarksDetectionTest, MultiFaceLandmarksDetectionTest,
Values(
MultiFaceTestParams{
/* test_name= */ "Portrait",
/* landmarks_model_name= */ kFaceLandmarksDetectionModel,
/* test_name= */ "PortraitWithV2",
/* landmarks_model_name= */
kFaceLandmarksV2Model,
/* blendshape_model_name= */ std::nullopt,
/* test_image_name= */ kPortraitImageName,
/* norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
/* expected_presence= */ {true},
/* expected_landmarks_list= */
{{GetExpectedLandmarkList(kPortraitExpectedFaceLandamrksName)}},
{{GetExpectedLandmarkList(kPortraitExpectedFaceLandmarksName)}},
/* expected_blendshapes= */ std::nullopt,
/* landmarks_diff_threshold= */ kFractionDiff,
/* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
MultiFaceTestParams{
/* test_name= */ "PortraitWithAttention",
/* test_name= */ "PortraitWithV2WithBlendshapes",
/* landmarks_model_name= */
kFaceLandmarksDetectionWithAttentionModel,
/* blendshape_model_name= */ std::nullopt,
/* test_image_name= */ kPortraitImageName,
/* norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
/* expected_presence= */ {true},
/* expected_landmarks_list= */
{{GetExpectedLandmarkList(
kPortraitExpectedFaceLandamrksWithAttentionName)}},
/* expected_blendshapes= */ std::nullopt,
/* landmarks_diff_threshold= */ kFractionDiff,
/* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
MultiFaceTestParams{
/* test_name= */ "PortraitWithAttentionWithBlendshapes",
/* landmarks_model_name= */
kFaceLandmarksDetectionWithAttentionModel,
kFaceLandmarksV2Model,
/* blendshape_model_name= */ kFaceBlendshapesModel,
/* test_image_name= */ kPortraitImageName,
/* norm_rects= */
@ -450,8 +420,7 @@ INSTANTIATE_TEST_SUITE_P(
0.008304443)},
/* expected_presence= */ {true},
/* expected_landmarks_list= */
{{GetExpectedLandmarkList(
kPortraitExpectedFaceLandamrksWithAttentionName)}},
{{GetExpectedLandmarkList(kPortraitExpectedFaceLandmarksName)}},
/* expected_blendshapes= */
{{GetBlendshapes(kPortraitExpectedBlendshapesName)}},
/* landmarks_diff_threshold= */ kFractionDiff,
@ -459,7 +428,7 @@ INSTANTIATE_TEST_SUITE_P(
MultiFaceTestParams{
/* test_name= */ "NoFace",
/* landmarks_model_name= */
kFaceLandmarksDetectionModel,
kFaceLandmarksV2Model,
/* blendshape_model_name= */ std::nullopt,
/* test_image_name= */ kCatImageName,
/* norm_rects= */ {MakeNormRect(0.5, 0.5, 1.0, 1.0, 0)},

View File

@ -25,11 +25,7 @@ message TensorsToFaceLandmarksGraphOptions {
optional TensorsToFaceLandmarksGraphOptions ext = 509621260;
}
// Whether the landmarks model is with attention on lips and eyes. Attention
// provides more accuracy on lips and eye regions as well as iris landmarks.
optional bool is_attention_model = 1 [default = false];
optional int32 input_image_width = 1;
optional int32 input_image_width = 2;
optional int32 input_image_height = 3;
optional int32 input_image_height = 2;
}

View File

@ -44,8 +44,8 @@ constexpr char kTensorsTag[] = "TENSORS";
constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS";
constexpr char kLandmarksTag[] = "LANDMARKS";
constexpr char kRefinedLandmarksTag[] = "REFINED_LANDMARKS";
constexpr int kAttentionModelSplitNum = 6;
constexpr int kMeshLandmarksNum = 468;
constexpr int kMeshWithIrisLandmarksNum = 478;
constexpr int kLipsLandmarksNum = 80;
constexpr int kEyeLandmarksNum = 71;
constexpr int kIrisLandmarksNum = 5;
@ -183,15 +183,6 @@ constexpr std::array<int, kContoursNumForIrisAvg> kRightIrisAvgIndices = {
// Upper contour (excluding corners).
466, 388, 387, 386, 385, 384, 398};
void ConfigureSplitTensorVectorCalculator(
mediapipe::SplitVectorCalculatorOptions* options) {
for (int i = 0; i < kAttentionModelSplitNum; ++i) {
auto* range = options->add_ranges();
range->set_begin(i);
range->set_end(i + 1);
}
}
Stream<NormalizedLandmarkList> ConvertTensorsToLandmarks(
int landmarks_num, int input_image_width, int input_image_height,
Stream<std::vector<Tensor>> tensors, Graph& graph) {
@ -207,79 +198,9 @@ Stream<NormalizedLandmarkList> ConvertTensorsToLandmarks(
.Cast<NormalizedLandmarkList>();
}
Stream<NormalizedLandmarkList> RefineFaceLandmarks(
Stream<NormalizedLandmarkList> mesh_landmarks,
Stream<NormalizedLandmarkList> lips_landmarks,
Stream<NormalizedLandmarkList> left_eye_landmarks,
Stream<NormalizedLandmarkList> right_eye_landmarks,
Stream<NormalizedLandmarkList> left_iris_landmarks,
Stream<NormalizedLandmarkList> right_iris_landmarks, Graph& graph) {
auto& refine_landmarks = graph.AddNode("LandmarksRefinementCalculator");
auto& refinement_options =
refine_landmarks
.GetOptions<mediapipe::LandmarksRefinementCalculatorOptions>();
// Face mesh landmarks.
auto* refinement_for_mesh = refinement_options.add_refinement();
refinement_for_mesh->mutable_indexes_mapping()->Assign(
kMeshLandmarksIndicesMapping.begin(), kMeshLandmarksIndicesMapping.end());
refinement_for_mesh->mutable_z_refinement()->mutable_copy();
// Lips landmarks.
auto* refinement_for_lips = refinement_options.add_refinement();
refinement_for_lips->mutable_indexes_mapping()->Assign(
kLipsLandmarksIndicesMapping.begin(), kLipsLandmarksIndicesMapping.end());
refinement_for_lips->mutable_z_refinement()->mutable_none();
// Left eye landmarks.
auto* refinement_for_left_eye = refinement_options.add_refinement();
refinement_for_left_eye->mutable_indexes_mapping()->Assign(
kLeftEyeLandmarksIndicesMapping.begin(),
kLeftEyeLandmarksIndicesMapping.end());
refinement_for_left_eye->mutable_z_refinement()->mutable_none();
// Right eye landmarks.
auto* refinement_for_right_eye = refinement_options.add_refinement();
refinement_for_right_eye->mutable_indexes_mapping()->Assign(
kRightEyeLandmarksIndicesMapping.begin(),
kRightEyeLandmarksIndicesMapping.end());
refinement_for_right_eye->mutable_z_refinement()->mutable_none();
// Left iris landmarks.
auto* refinement_for_left_iris = refinement_options.add_refinement();
refinement_for_left_iris->mutable_indexes_mapping()->Assign(
kLeftIrisLandmarksIndicesMapping.begin(),
kLeftIrisLandmarksIndicesMapping.end());
refinement_for_left_iris->mutable_z_refinement()
->mutable_assign_average()
->mutable_indexes_for_average()
->Assign(kLeftIrisAvgIndices.begin(), kLeftIrisAvgIndices.end());
// Right iris landmarks.
auto* refinement_for_right_iris = refinement_options.add_refinement();
refinement_for_right_iris->mutable_indexes_mapping()->Assign(
kRightIrisLandmarksIndicesMapping.begin(),
kRightIrisLandmarksIndicesMapping.end());
refinement_for_right_iris->mutable_z_refinement()
->mutable_assign_average()
->mutable_indexes_for_average()
->Assign(kRightIrisAvgIndices.begin(), kRightIrisAvgIndices.end());
mesh_landmarks >> refine_landmarks.In(kLandmarksTag)[0];
lips_landmarks >> refine_landmarks.In(kLandmarksTag)[1];
left_eye_landmarks >> refine_landmarks.In(kLandmarksTag)[2];
right_eye_landmarks >> refine_landmarks.In(kLandmarksTag)[3];
left_iris_landmarks >> refine_landmarks.In(kLandmarksTag)[4];
right_iris_landmarks >> refine_landmarks.In(kLandmarksTag)[5];
return refine_landmarks.Out(kRefinedLandmarksTag)
.Cast<NormalizedLandmarkList>();
}
} // namespace
// Graph to transform face landmarks model output tensors into landmarks.
// The graph can support two types of model: regular and attention model with
// refined lips, eye and irises.
//
// Inputs:
// TENSORS - std::vector<Tensor>
@ -324,41 +245,9 @@ class TensorsToFaceLandmarksGraph : public Subgraph {
Stream<std::vector<Tensor>> tensors, Graph& graph) {
const int input_image_width = subgraph_options.input_image_width();
const int input_image_height = subgraph_options.input_image_height();
if (subgraph_options.is_attention_model()) {
// Split tensors from attention model to 6 streams: mesh, lips, left_eye,
// right_eye, left_iris and right iris.
auto& split_tensors_vector = graph.AddNode("SplitTensorVectorCalculator");
ConfigureSplitTensorVectorCalculator(
&split_tensors_vector
.GetOptions<mediapipe::SplitVectorCalculatorOptions>());
tensors >> split_tensors_vector.In("");
auto mesh_landmarks = ConvertTensorsToLandmarks(
kMeshLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(0).Cast<std::vector<Tensor>>(), graph);
auto lips_landmarks = ConvertTensorsToLandmarks(
kLipsLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(1).Cast<std::vector<Tensor>>(), graph);
auto left_eye_landmarks = ConvertTensorsToLandmarks(
kEyeLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(2).Cast<std::vector<Tensor>>(), graph);
auto right_eye_landmarks = ConvertTensorsToLandmarks(
kEyeLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(3).Cast<std::vector<Tensor>>(), graph);
auto left_iris_landmarks = ConvertTensorsToLandmarks(
kIrisLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(4).Cast<std::vector<Tensor>>(), graph);
auto right_iris_landmarks = ConvertTensorsToLandmarks(
kIrisLandmarksNum, input_image_width, input_image_height,
split_tensors_vector.Out(5).Cast<std::vector<Tensor>>(), graph);
return RefineFaceLandmarks(mesh_landmarks, lips_landmarks,
left_eye_landmarks, right_eye_landmarks,
left_iris_landmarks, right_iris_landmarks,
graph);
} else {
return ConvertTensorsToLandmarks(kMeshLandmarksNum, input_image_width,
input_image_height, tensors, graph);
}
return ConvertTensorsToLandmarks(kMeshWithIrisLandmarksNum,
input_image_width, input_image_height,
tensors, graph);
}
};

View File

@ -50,19 +50,16 @@ import org.junit.runners.Suite.SuiteClasses;
@SuiteClasses({FaceLandmarkerTest.General.class, FaceLandmarkerTest.RunningModeTest.class})
public class FaceLandmarkerTest {
private static final String FACE_LANDMARKER_BUNDLE_ASSET_FILE =
"face_landmarker_with_blendshapes.task";
"face_landmarker_v2_with_blendshapes.task";
private static final String PORTRAIT_IMAGE = "portrait.jpg";
private static final String CAT_IMAGE = "cat.jpg";
private static final String PORTRAIT_FACE_LANDMARKS =
"portrait_expected_face_landmarks_with_attention.pb";
private static final String PORTRAIT_FACE_BLENDSHAPES =
"portrait_expected_blendshapes_with_attention.pb";
private static final String PORTRAIT_FACE_GEOMETRY =
"portrait_expected_face_geometry_with_attention.pb";
private static final String PORTRAIT_FACE_LANDMARKS = "portrait_expected_face_landmarks.pb";
private static final String PORTRAIT_FACE_BLENDSHAPES = "portrait_expected_blendshapes.pb";
private static final String PORTRAIT_FACE_GEOMETRY = "portrait_expected_face_geometry.pb";
private static final String TAG = "Face Landmarker Test";
private static final float FACE_LANDMARKS_ERROR_TOLERANCE = 0.01f;
private static final float FACE_BLENDSHAPES_ERROR_TOLERANCE = 0.1f;
private static final float FACIAL_TRANSFORMATION_MATRIX_ERROR_TOLERANCE = 0.01f;
private static final float FACE_BLENDSHAPES_ERROR_TOLERANCE = 0.13f;
private static final float FACIAL_TRANSFORMATION_MATRIX_ERROR_TOLERANCE = 0.02f;
private static final int IMAGE_WIDTH = 820;
private static final int IMAGE_HEIGHT = 1024;

View File

@ -46,16 +46,11 @@ _FaceLandmarkerOptions = face_landmarker.FaceLandmarkerOptions
_RUNNING_MODE = running_mode_module.VisionTaskRunningMode
_ImageProcessingOptions = image_processing_options_module.ImageProcessingOptions
_FACE_LANDMARKER_BUNDLE_ASSET_FILE = 'face_landmarker.task'
_FACE_LANDMARKER_BUNDLE_ASSET_FILE = 'face_landmarker_v2.task'
_PORTRAIT_IMAGE = 'portrait.jpg'
_CAT_IMAGE = 'cat.jpg'
_PORTRAIT_EXPECTED_FACE_LANDMARKS = 'portrait_expected_face_landmarks.pbtxt'
_PORTRAIT_EXPECTED_FACE_LANDMARKS_WITH_ATTENTION = (
'portrait_expected_face_landmarks_with_attention.pbtxt'
)
_PORTRAIT_EXPECTED_BLENDSHAPES = (
'portrait_expected_blendshapes_with_attention.pbtxt'
)
_PORTRAIT_EXPECTED_BLENDSHAPES = 'portrait_expected_blendshapes.pbtxt'
_LANDMARKS_DIFF_MARGIN = 0.03
_BLENDSHAPES_DIFF_MARGIN = 0.13
_FACIAL_TRANSFORMATION_MATRIX_DIFF_MARGIN = 0.02
@ -94,9 +89,9 @@ def _get_expected_face_blendshapes(file_path: str):
def _get_expected_facial_transformation_matrixes():
matrix = np.array([
[0.9995292, -0.005092691, 0.030254554, -0.37340546],
[0.0072318087, 0.99744856, -0.07102106, 22.212194],
[-0.029815676, 0.07120642, 0.9970159, -64.76358],
[0.9995292, -0.01294756, 0.038823195, -0.3691378],
[0.0072318087, 0.9937692, -0.1101321, 22.75809],
[-0.03715533, 0.11070588, 0.99315894, -65.765925],
[0, 0, 0, 1],
])
facial_transformation_matrixes_results = []

View File

@ -45,8 +45,8 @@ mediapipe_files(srcs = [
"face_detection_full_range_sparse.tflite",
"face_detection_short_range.tflite",
"face_landmark.tflite",
"face_landmark_with_attention.tflite",
"face_landmarker.task",
"face_landmarker_v2.task",
"fist.jpg",
"fist.png",
"hair_segmentation.tflite",
@ -104,7 +104,7 @@ exports_files(
"face_geometry_expected_out.pbtxt",
"gesture_recognizer.task",
"portrait_expected_detection.pbtxt",
"portrait_expected_face_geometry_with_attention.pbtxt",
"portrait_expected_face_geometry.pbtxt",
"portrait_rotated_expected_detection.pbtxt",
],
)
@ -170,9 +170,8 @@ filegroup(
"face_detection_full_range.tflite",
"face_detection_full_range_sparse.tflite",
"face_detection_short_range.tflite",
"face_landmark.tflite",
"face_landmark_with_attention.tflite",
"face_landmarker.task",
"face_landmarker_v2.task",
"hair_segmentation.tflite",
"hand_landmark_full.tflite",
"hand_landmark_lite.tflite",
@ -214,9 +213,8 @@ filegroup(
"pointing_up_landmarks.pbtxt",
"pointing_up_rotated_landmarks.pbtxt",
"portrait_expected_detection.pbtxt",
"portrait_expected_face_geometry_with_attention.pbtxt",
"portrait_expected_face_geometry.pbtxt",
"portrait_expected_face_landmarks.pbtxt",
"portrait_expected_face_landmarks_with_attention.pbtxt",
"portrait_rotated_expected_detection.pbtxt",
"pose_expected_detection.pbtxt",
"pose_expected_expanded_rect.pbtxt",

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -364,6 +364,18 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/face_landmarker.task?generation=1678323583183024"],
)
http_file(
name = "com_google_mediapipe_face_landmarker_v2_task",
sha256 = "af23fc7c1ff21d034deaa2b7fc1d56bb670ce69a4cbdc9579b6f1afd680835f4",
urls = ["https://storage.googleapis.com/mediapipe-assets/face_landmarker_v2.task?generation=1681322464758457"],
)
http_file(
name = "com_google_mediapipe_face_landmarker_v2_with_blendshapes_task",
sha256 = "b261925d4aad812b47a0e8d58c1baa1223270a5d1f663d78338bc881c003879d",
urls = ["https://storage.googleapis.com/mediapipe-assets/face_landmarker_v2_with_blendshapes.task?generation=1681322467931433"],
)
http_file(
name = "com_google_mediapipe_face_landmarker_with_blendshapes_task",
sha256 = "b44e4cae6f5822456d60f33e7c852640d78c7e342aee7eacc22589451a0b9dc2",
@ -382,6 +394,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/face_landmark_with_attention.tflite?generation=1676415468821650"],
)
http_file(
name = "com_google_mediapipe_facemesh2_lite_iris_faceflag_2023_02_14_tflite",
sha256 = "bc5ee5de06d8c3a5465c3155227615b164480a52105a2b3df5748250ab4d914f",
urls = ["https://storage.googleapis.com/mediapipe-assets/facemesh2_lite_iris_faceflag_2023_02_14.tflite?generation=1681322470818178"],
)
http_file(
name = "com_google_mediapipe_face_stylization_dummy_tflite",
sha256 = "f57fd2d5638def25466f6fec142eb3397d8ad99a9bd0a9344b622bad7c3f0376",
@ -863,9 +881,9 @@ def external_files():
)
http_file(
name = "com_google_mediapipe_portrait_expected_blendshapes_with_attention_pbtxt",
name = "com_google_mediapipe_portrait_expected_blendshapes_pbtxt",
sha256 = "3f8f698d8ed81346c6f13d1cc85190fd4a58b021e664d336997d29818b8ffbb6",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_blendshapes_with_attention.pbtxt?generation=1678323598426417"],
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_blendshapes.pbtxt?generation=1681322480981015"],
)
http_file(
@ -875,21 +893,15 @@ def external_files():
)
http_file(
name = "com_google_mediapipe_portrait_expected_face_geometry_with_attention_pbtxt",
sha256 = "7ed1eed98e61e0a10811bb611c895d87c8023f398a36db01b6d9ba2e1ab09e16",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_geometry_with_attention.pbtxt?generation=1678737486927530"],
name = "com_google_mediapipe_portrait_expected_face_geometry_pbtxt",
sha256 = "f1045ae7a479248d5c6729102401308c042068304f393934370be53587ccec9a",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_geometry.pbtxt?generation=1681322483632218"],
)
http_file(
name = "com_google_mediapipe_portrait_expected_face_landmarks_pbtxt",
sha256 = "4ac8587379bd072c36cda0d7345f5e592fae51b30522475e0b49c18aab108ce7",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_landmarks.pbtxt?generation=1676316357333369"],
)
http_file(
name = "com_google_mediapipe_portrait_expected_face_landmarks_with_attention_pbtxt",
sha256 = "dae959456f001015278f3a1535bd03c9fa0990a3df951135645ce23293be0613",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_landmarks_with_attention.pbtxt?generation=1678218367300928"],
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_expected_face_landmarks.pbtxt?generation=1681322486192872"],
)
http_file(