Add optional face blendshapes to face landmarks detector graph.

PiperOrigin-RevId: 513867488
2023-03-03 10:46:38 -08:00 · 2023-03-03 10:46:38 -08:00 · b7ec83efb5
commit b7ec83efb5
parent c9c1bf21ae
6 changed files with 575 additions and 48 deletions
--- a/mediapipe/tasks/cc/vision/face_landmarker/BUILD
+++ b/mediapipe/tasks/cc/vision/face_landmarker/BUILD
@ -69,12 +69,14 @@ cc_library(
    name = "face_landmarks_detector_graph",
    srcs = ["face_landmarks_detector_graph.cc"],
    deps = [
        ":face_blendshapes_graph",
        ":tensors_to_face_landmarks_graph",
        "//mediapipe/calculators/core:begin_loop_calculator",
        "//mediapipe/calculators/core:end_loop_calculator",
        "//mediapipe/calculators/core:split_vector_calculator",
        "//mediapipe/calculators/core:split_vector_calculator_cc_proto",
        "//mediapipe/calculators/image:image_properties_calculator",
        "//mediapipe/calculators/tensor:image_to_tensor_calculator_cc_proto",
        "//mediapipe/calculators/tensor:inference_calculator",
        "//mediapipe/calculators/tensor:tensors_to_floats_calculator",
        "//mediapipe/calculators/tensor:tensors_to_floats_calculator_cc_proto",
@ -89,8 +91,11 @@ cc_library(
        "//mediapipe/calculators/util:rect_transformation_calculator_cc_proto",
        "//mediapipe/calculators/util:thresholding_calculator",
        "//mediapipe/calculators/util:thresholding_calculator_cc_proto",
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework:calculator_options_cc_proto",
        "//mediapipe/framework/api2:builder",
        "//mediapipe/framework/api2:port",
        "//mediapipe/framework/formats:classification_cc_proto",
        "//mediapipe/framework/formats:image",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:rect_cc_proto",
@ -101,9 +106,11 @@ cc_library(
        "//mediapipe/tasks/cc/core:model_resources",
        "//mediapipe/tasks/cc/core:model_task_graph",
        "//mediapipe/tasks/cc/core:utils",
        "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_blendshapes_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/face_landmarker/proto:tensors_to_face_landmarks_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/utils:image_tensor_specs",
        "//mediapipe/util:graph_builder_utils",
    ],
    alwayslink = 1,
 )
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_detector_graph.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_detector_graph.cc
@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include <memory>
 #include <optional>
 #include <type_traits>
 #include <utility>
 #include <vector>
@ -26,6 +27,9 @@ limitations under the License.
 #include "mediapipe/calculators/util/thresholding_calculator.pb.h"
 #include "mediapipe/framework/api2/builder.h"
 #include "mediapipe/framework/api2/port.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/calculator_options.pb.h"
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
@ -36,9 +40,11 @@ limitations under the License.
 #include "mediapipe/tasks/cc/core/model_resources.h"
 #include "mediapipe/tasks/cc/core/model_task_graph.h"
 #include "mediapipe/tasks/cc/core/utils.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
 #include "mediapipe/util/graph_builder_utils.h"
 namespace mediapipe {
 namespace tasks {
@ -72,6 +78,7 @@ constexpr char kIterableTag[] = "ITERABLE";
 constexpr char kBatchEndTag[] = "BATCH_END";
 constexpr char kItemTag[] = "ITEM";
 constexpr char kDetectionTag[] = "DETECTION";
 constexpr char kBlendshapesTag[] = "BLENDSHAPES";
 // a landmarks tensor and a scores tensor
 constexpr int kFaceLandmarksOutputTensorsNum = 2;
@ -83,6 +90,7 @@ struct SingleFaceLandmarksOutputs {
  Stream<NormalizedRect> rect_next_frame;
  Stream<bool> presence;
  Stream<float> presence_score;
  std::optional<Stream<ClassificationList>> face_blendshapes;
 };
 struct MultiFaceLandmarksOutputs {
@ -90,6 +98,7 @@ struct MultiFaceLandmarksOutputs {
  Stream<std::vector<NormalizedRect>> rects_next_frame;
  Stream<std::vector<bool>> presences;
  Stream<std::vector<float>> presence_scores;
  std::optional<Stream<std::vector<ClassificationList>>> face_blendshapes;
 };
 absl::Status SanityCheckOptions(
@ -180,6 +189,62 @@ bool IsAttentionModel(const core::ModelResources& model_resources) {
 //     Boolean value indicates whether the face is present.
 //   PRESENCE_SCORE - float
 //     Float value indicates the probability that the face is present.
 //   BLENDSHAPES - ClassificationList @optional
 //     Blendshape classification, available when face_blendshapes_graph_options
 //     is set.
 //     All 52 blendshape coefficients:
 //       0  - _neutral  (ignore it)
 //       1  - browDownLeft
 //       2  - browDownRight
 //       3  - browInnerUp
 //       4  - browOuterUpLeft
 //       5  - browOuterUpRight
 //       6  - cheekPuff
 //       7  - cheekSquintLeft
 //       8  - cheekSquintRight
 //       9  - eyeBlinkLeft
 //       10 - eyeBlinkRight
 //       11 - eyeLookDownLeft
 //       12 - eyeLookDownRight
 //       13 - eyeLookInLeft
 //       14 - eyeLookInRight
 //       15 - eyeLookOutLeft
 //       16 - eyeLookOutRight
 //       17 - eyeLookUpLeft
 //       18 - eyeLookUpRight
 //       19 - eyeSquintLeft
 //       20 - eyeSquintRight
 //       21 - eyeWideLeft
 //       22 - eyeWideRight
 //       23 - jawForward
 //       24 - jawLeft
 //       25 - jawOpen
 //       26 - jawRight
 //       27 - mouthClose
 //       28 - mouthDimpleLeft
 //       29 - mouthDimpleRight
 //       30 - mouthFrownLeft
 //       31 - mouthFrownRight
 //       32 - mouthFunnel
 //       33 - mouthLeft
 //       34 - mouthLowerDownLeft
 //       35 - mouthLowerDownRight
 //       36 - mouthPressLeft
 //       37 - mouthPressRight
 //       38 - mouthPucker
 //       39 - mouthRight
 //       40 - mouthRollLower
 //       41 - mouthRollUpper
 //       42 - mouthShrugLower
 //       43 - mouthShrugUpper
 //       44 - mouthSmileLeft
 //       45 - mouthSmileRight
 //       46 - mouthStretchLeft
 //       47 - mouthStretchRight
 //       48 - mouthUpperUpLeft
 //       49 - mouthUpperUpRight
 //       50 - noseSneerLeft
 //       51 - noseSneerRight
 //
 // Example:
 // node {
@ -191,6 +256,7 @@ bool IsAttentionModel(const core::ModelResources& model_resources) {
 //   output_stream: "FACE_RECT_NEXT_FRAME:face_rect_next_frame"
 //   output_stream: "PRESENCE:presence"
 //   output_stream: "PRESENCE_SCORE:presence_score"
 //   output_stream: "BLENDSHAPES:blendshapes"
 //   options {
 //     [mediapipe.tasks.vision.face_landmarker.proto.FaceLandmarksDetectorGraphOptions.ext]
 //     {
@ -200,6 +266,13 @@ bool IsAttentionModel(const core::ModelResources& model_resources) {
 //          }
 //       }
 //       min_detection_confidence: 0.5
 //       face_blendshapes_graph_options {
 //          base_options {
 //            model_asset {
 //              file_name: "face_blendshape.tflite"
 //            }
 //          }
 //       }
 //     }
 //   }
 // }
@ -214,7 +287,7 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
    ASSIGN_OR_RETURN(
        auto outs,
        BuildSingleFaceLandmarksDetectorGraph(
-            sc->Options<proto::FaceLandmarksDetectorGraphOptions>(),
+            *sc->MutableOptions<proto::FaceLandmarksDetectorGraphOptions>(),
            *model_resources, graph[Input<Image>(kImageTag)],
            graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
    outs.landmarks >>
@ -223,6 +296,10 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
        graph.Out(kFaceRectNextFrameTag).Cast<NormalizedRect>();
    outs.presence >> graph.Out(kPresenceTag).Cast<bool>();
    outs.presence_score >> graph.Out(kPresenceScoreTag).Cast<float>();
    if (outs.face_blendshapes) {
      outs.face_blendshapes.value() >>
          graph.Out(kBlendshapesTag).Cast<ClassificationList>();
    }
    return graph.GetConfig();
  }
@ -239,7 +316,7 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
  // graph: the mediapipe graph instance to be updated.
  absl::StatusOr<SingleFaceLandmarksOutputs>
  BuildSingleFaceLandmarksDetectorGraph(
-      const proto::FaceLandmarksDetectorGraphOptions& subgraph_options,
+      proto::FaceLandmarksDetectorGraphOptions& subgraph_options,
      const core::ModelResources& model_resources, Stream<Image> image_in,
      Stream<NormalizedRect> face_rect, Graph& graph) {
    MP_RETURN_IF_ERROR(SanityCheckOptions(subgraph_options));
@ -351,11 +428,26 @@ class SingleFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
    auto face_rect_next_frame =
        AllowIf(face_rect_transformation.Out("").Cast<NormalizedRect>(),
                presence, graph);
    std::optional<Stream<ClassificationList>> face_blendshapes;
    if (subgraph_options.has_face_blendshapes_graph_options()) {
      auto& face_blendshapes_graph = graph.AddNode(
          "mediapipe.tasks.vision.face_landmarker.FaceBlendshapesGraph");
      face_blendshapes_graph.GetOptions<proto::FaceBlendshapesGraphOptions>()
          .Swap(subgraph_options.mutable_face_blendshapes_graph_options());
      projected_landmarks >> face_blendshapes_graph.In(kLandmarksTag);
      image_size >> face_blendshapes_graph.In(kImageSizeTag);
      face_blendshapes =
          std::make_optional(face_blendshapes_graph.Out(kBlendshapesTag)
                                 .Cast<ClassificationList>());
    }
    return {{
        /* landmarks= */ projected_landmarks,
        /* rect_next_frame= */ face_rect_next_frame,
        /* presence= */ presence,
        /* presence_score= */ presence_score,
        /* face_blendshapes= */ face_blendshapes,
    }};
  }
 };
@ -390,6 +482,9 @@ REGISTER_MEDIAPIPE_GRAPH(
 //     Vector of boolean value indicates whether the face is present.
 //   PRESENCE_SCORE - std::vector<float>
 //     Vector of float value indicates the probability that the face is present.
 //   BLENDSHAPES - std::vector<ClassificationList> @optional
 //     Vector of face blendshape classification, available when
 //     face_blendshapes_graph_options is set.
 //
 // Example:
 // node {
@ -401,6 +496,7 @@ REGISTER_MEDIAPIPE_GRAPH(
 //   output_stream: "FACE_RECTS_NEXT_FRAME:face_rects_next_frame"
 //   output_stream: "PRESENCE:presence"
 //   output_stream: "PRESENCE_SCORE:presence_score"
 //   output_stream: "BLENDSHAPES:blendshapes"
 //   options {
 //     [mediapipe.tasks.vision.face_landmarker.proto.FaceLandmarksDetectorGraphOptions.ext]
 //     {
@ -410,6 +506,13 @@ REGISTER_MEDIAPIPE_GRAPH(
 //          }
 //       }
 //       min_detection_confidence: 0.5
 //       face_blendshapes_graph_options {
 //          base_options {
 //            model_asset {
 //              file_name: "face_blendshape.tflite"
 //            }
 //          }
 //       }
 //     }
 //   }
 // }
@ -421,7 +524,7 @@ class MultiFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
    ASSIGN_OR_RETURN(
        auto outs,
        BuildFaceLandmarksDetectorGraph(
-            sc->Options<proto::FaceLandmarksDetectorGraphOptions>(),
+            *sc->MutableOptions<proto::FaceLandmarksDetectorGraphOptions>(),
            graph[Input<Image>(kImageTag)],
            graph[Input<std::vector<NormalizedRect>>(kNormRectTag)], graph));
    outs.landmarks_lists >> graph.Out(kNormLandmarksTag)
@ -431,13 +534,16 @@ class MultiFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
    outs.presences >> graph.Out(kPresenceTag).Cast<std::vector<bool>>();
    outs.presence_scores >>
        graph.Out(kPresenceScoreTag).Cast<std::vector<float>>();
-
+    if (outs.face_blendshapes) {
      outs.face_blendshapes.value() >>
          graph.Out(kBlendshapesTag).Cast<std::vector<ClassificationList>>();
    }
    return graph.GetConfig();
  }
 private:
  absl::StatusOr<MultiFaceLandmarksOutputs> BuildFaceLandmarksDetectorGraph(
-      const proto::FaceLandmarksDetectorGraphOptions& subgraph_options,
+      proto::FaceLandmarksDetectorGraphOptions& subgraph_options,
      Stream<Image> image_in,
      Stream<std::vector<NormalizedRect>> multi_face_rects, Graph& graph) {
    auto& face_landmark_subgraph = graph.AddNode(
@ -445,7 +551,7 @@ class MultiFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
        "SingleFaceLandmarksDetectorGraph");
    face_landmark_subgraph
        .GetOptions<proto::FaceLandmarksDetectorGraphOptions>()
-        .CopyFrom(subgraph_options);
+        .Swap(&subgraph_options);
    auto& begin_loop_multi_face_rects =
        graph.AddNode("BeginLoopNormalizedRectCalculator");
@ -490,11 +596,27 @@ class MultiFaceLandmarksDetectorGraph : public core::ModelTaskGraph {
    auto face_rects_next_frame = end_loop_rects_next_frame.Out(kIterableTag)
                                     .Cast<std::vector<NormalizedRect>>();
    std::optional<Stream<std::vector<ClassificationList>>>
        face_blendshapes_vector;
    if (face_landmark_subgraph
            .GetOptions<proto::FaceLandmarksDetectorGraphOptions>()
            .has_face_blendshapes_graph_options()) {
      auto blendshapes = face_landmark_subgraph.Out(kBlendshapesTag);
      auto& end_loop_blendshapes =
          graph.AddNode("EndLoopClassificationListCalculator");
      batch_end >> end_loop_blendshapes.In(kBatchEndTag);
      blendshapes >> end_loop_blendshapes.In(kItemTag);
      face_blendshapes_vector =
          std::make_optional(end_loop_blendshapes.Out(kIterableTag)
                                 .Cast<std::vector<ClassificationList>>());
    }
    return {{
        /* landmarks_lists= */ landmark_lists,
        /* face_rects_next_frame= */ face_rects_next_frame,
        /* presences= */ presences,
        /* presence_scores= */ presence_scores,
        /* face_blendshapes= */ face_blendshapes_vector,
    }};
  }
 };
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_detector_graph_test.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarks_detector_graph_test.cc
@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <optional>
 #include "absl/flags/flag.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_format.h"
@ -21,6 +23,7 @@ limitations under the License.
 #include "mediapipe/framework/api2/port.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
@ -33,6 +36,7 @@ limitations under the License.
 #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
 #include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
 #include "mediapipe/tasks/cc/core/task_runner.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
@ -70,6 +74,9 @@ constexpr char kPortraitExpectedFaceLandamrksName[] =
    "portrait_expected_face_landmarks.pbtxt";
 constexpr char kPortraitExpectedFaceLandamrksWithAttentionName[] =
    "portrait_expected_face_landmarks_with_attention.pbtxt";
 constexpr char kFaceBlendshapesModel[] = "face_blendshapes.tflite";
 constexpr char kPortraitExpectedBlendshapesName[] =
    "portrait_expected_blendshapes_with_attention.pbtxt";
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kImageName[] = "image";
@ -86,13 +93,17 @@ constexpr char kPresenceTag[] = "PRESENCE";
 constexpr char kPresenceName[] = "presence";
 constexpr char kPresenceScoreTag[] = "PRESENCE_SCORE";
 constexpr char kPresenceScoreName[] = "presence_score";
 constexpr char kBlendshapesTag[] = "BLENDSHAPES";
 constexpr char kBlendshapesName[] = "blendshapes";
 constexpr float kFractionDiff = 0.05;  // percentage
 constexpr float kAbsMargin = 0.03;
 constexpr float kBlendshapesDiffMargin = 0.1;
 // Helper function to create a Single Face Landmark TaskRunner.
 absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleFaceLandmarksTaskRunner(
-    absl::string_view model_name) {
+    absl::string_view landmarks_model_name,
    std::optional<absl::string_view> blendshapes_model_name) {
  Graph graph;
  auto& face_landmark_detection = graph.AddNode(
@ -101,8 +112,17 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleFaceLandmarksTaskRunner(
  auto options = std::make_unique<proto::FaceLandmarksDetectorGraphOptions>();
  options->mutable_base_options()->mutable_model_asset()->set_file_name(
-      JoinPath("./", kTestDataDirectory, model_name));
+      JoinPath("./", kTestDataDirectory, landmarks_model_name));
  options->set_min_detection_confidence(0.5);
  if (blendshapes_model_name.has_value()) {
    options->mutable_face_blendshapes_graph_options()
        ->mutable_base_options()
        ->mutable_model_asset()
        ->set_file_name(
            JoinPath("./", kTestDataDirectory, *blendshapes_model_name));
  }
  face_landmark_detection.GetOptions<proto::FaceLandmarksDetectorGraphOptions>()
      .Swap(options.get());
@ -120,6 +140,10 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleFaceLandmarksTaskRunner(
  face_landmark_detection.Out(kFaceRectNextFrameTag)
          .SetName(kFaceRectNextFrameName) >>
      graph[Output<NormalizedRect>(kFaceRectNextFrameTag)];
  if (blendshapes_model_name.has_value()) {
    face_landmark_detection.Out(kBlendshapesTag).SetName(kBlendshapesName) >>
        graph[Output<ClassificationList>(kBlendshapesTag)];
  }
  return TaskRunner::Create(
      graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
@ -127,7 +151,8 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateSingleFaceLandmarksTaskRunner(
 // Helper function to create a Multi Face Landmark TaskRunner.
 absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiFaceLandmarksTaskRunner(
-    absl::string_view model_name) {
+    absl::string_view landmarks_model_name,
    std::optional<absl::string_view> blendshapes_model_name) {
  Graph graph;
  auto& face_landmark_detection = graph.AddNode(
@ -136,8 +161,15 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiFaceLandmarksTaskRunner(
  auto options = std::make_unique<proto::FaceLandmarksDetectorGraphOptions>();
  options->mutable_base_options()->mutable_model_asset()->set_file_name(
-      JoinPath("./", kTestDataDirectory, model_name));
+      JoinPath("./", kTestDataDirectory, landmarks_model_name));
  options->set_min_detection_confidence(0.5);
  if (blendshapes_model_name.has_value()) {
    options->mutable_face_blendshapes_graph_options()
        ->mutable_base_options()
        ->mutable_model_asset()
        ->set_file_name(
            JoinPath("./", kTestDataDirectory, *blendshapes_model_name));
  }
  face_landmark_detection.GetOptions<proto::FaceLandmarksDetectorGraphOptions>()
      .Swap(options.get());
@ -156,6 +188,10 @@ absl::StatusOr<std::unique_ptr<TaskRunner>> CreateMultiFaceLandmarksTaskRunner(
  face_landmark_detection.Out(kFaceRectsNextFrameTag)
          .SetName(kFaceRectsNextFrameName) >>
      graph[Output<std::vector<NormalizedRect>>(kFaceRectsNextFrameTag)];
  if (blendshapes_model_name.has_value()) {
    face_landmark_detection.Out(kBlendshapesTag).SetName(kBlendshapesName) >>
        graph[Output<ClassificationList>(kBlendshapesTag)];
  }
  return TaskRunner::Create(
      graph.GetConfig(), absl::make_unique<core::MediaPipeBuiltinOpResolver>());
@ -168,6 +204,13 @@ NormalizedLandmarkList GetExpectedLandmarkList(absl::string_view filename) {
  return expected_landmark_list;
 }
 ClassificationList GetBlendshapes(absl::string_view filename) {
  ClassificationList blendshapes;
  MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
                            &blendshapes, Defaults()));
  return blendshapes;
 }
 // Helper function to construct NormalizeRect proto.
 NormalizedRect MakeNormRect(float x_center, float y_center, float width,
                            float height, float rotation) {
@ -185,8 +228,10 @@ NormalizedRect MakeNormRect(float x_center, float y_center, float width,
 struct SingeFaceTestParams {
  // The name of this test, for convenience when displaying test results.
  std::string test_name;
-  // The filename of the model to test.
+  // The filename of landmarks model name.
-  std::string input_model_name;
+  std::string landmarks_model_name;
  // The filename of blendshape model name.
  std::optional<std::string> blendshape_model_name;
  // The filename of the test image.
  std::string test_image_name;
  // RoI on image to detect hands.
@ -195,15 +240,22 @@ struct SingeFaceTestParams {
  bool expected_presence;
  // The expected output landmarks positions.
  NormalizedLandmarkList expected_landmarks;
  // The expected output blendshape classification;
  std::optional<ClassificationList> expected_blendshapes;
  // The max value difference between expected_positions and detected positions.
  float landmarks_diff_threshold;
  // The max value difference between expected blendshapes and actual
  // blendshapes.
  float blendshapes_diff_threshold;
 };
 struct MultiFaceTestParams {
  // The name of this test, for convenience when displaying test results.
  std::string test_name;
-  // The filename of the model to test.
+  // The filename of landmarks model name.
-  std::string input_model_name;
+  std::string landmarks_model_name;
  // The filename of blendshape model name.
  std::optional<std::string> blendshape_model_name;
  // The filename of the test image.
  std::string test_image_name;
  // RoI on image to detect hands.
@ -212,8 +264,13 @@ struct MultiFaceTestParams {
  std::vector<bool> expected_presence;
  // The expected output landmarks positions.
  std::optional<std::vector<NormalizedLandmarkList>> expected_landmarks_lists;
  // The expected output blendshape classification;
  std::optional<std::vector<ClassificationList>> expected_blendshapes;
  // The max value difference between expected_positions and detected positions.
  float landmarks_diff_threshold;
  // The max value difference between expected blendshapes and actual
  // blendshapes.
  float blendshapes_diff_threshold;
 };
 class SingleFaceLandmarksDetectionTest
@ -223,8 +280,10 @@ TEST_P(SingleFaceLandmarksDetectionTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
                                                GetParam().test_image_name)));
-  MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateSingleFaceLandmarksTaskRunner(
+  MP_ASSERT_OK_AND_ASSIGN(
-                                                GetParam().input_model_name));
+      auto task_runner,
      CreateSingleFaceLandmarksTaskRunner(GetParam().landmarks_model_name,
                                          GetParam().blendshape_model_name));
  auto output_packets = task_runner->Process(
      {{kImageName, MakePacket<Image>(std::move(image))},
@ -246,6 +305,15 @@ TEST_P(SingleFaceLandmarksDetectionTest, Succeeds) {
        Approximately(Partially(EqualsProto(expected_landmarks)),
                      /*margin=*/kAbsMargin,
                      /*fraction=*/GetParam().landmarks_diff_threshold));
    if (GetParam().expected_blendshapes) {
      const ClassificationList& actual_blendshapes =
          (*output_packets)[kBlendshapesName].Get<ClassificationList>();
      const ClassificationList& expected_blendshapes =
          *GetParam().expected_blendshapes;
      EXPECT_THAT(actual_blendshapes,
                  Approximately(EqualsProto(expected_blendshapes),
                                GetParam().blendshapes_diff_threshold));
    }
  }
 }
@ -256,8 +324,10 @@ TEST_P(MultiFaceLandmarksDetectionTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
                                                GetParam().test_image_name)));
-  MP_ASSERT_OK_AND_ASSIGN(auto task_runner, CreateMultiFaceLandmarksTaskRunner(
+  MP_ASSERT_OK_AND_ASSIGN(
-                                                GetParam().input_model_name));
+      auto task_runner,
      CreateMultiFaceLandmarksTaskRunner(GetParam().landmarks_model_name,
                                         GetParam().blendshape_model_name));
  auto output_packets = task_runner->Process(
      {{kImageName, MakePacket<Image>(std::move(image))},
@ -278,29 +348,63 @@ TEST_P(MultiFaceLandmarksDetectionTest, Succeeds) {
                              /*fraction=*/GetParam().landmarks_diff_threshold),
                          *GetParam().expected_landmarks_lists));
  }
  if (GetParam().expected_blendshapes) {
    const std::vector<ClassificationList>& actual_blendshapes =
        (*output_packets)[kBlendshapesName]
            .Get<std::vector<ClassificationList>>();
    const std::vector<ClassificationList>& expected_blendshapes =
        *GetParam().expected_blendshapes;
    EXPECT_THAT(actual_blendshapes,
                Pointwise(Approximately(EqualsProto(),
                                        GetParam().blendshapes_diff_threshold),
                          expected_blendshapes));
  }
 }
 INSTANTIATE_TEST_SUITE_P(
    FaceLandmarksDetectionTest, SingleFaceLandmarksDetectionTest,
    Values(SingeFaceTestParams{
               /* test_name= */ "Portrait",
-               /*input_model_name= */ kFaceLandmarksDetectionModel,
+               /* landmarks_model_name= */ kFaceLandmarksDetectionModel,
-               /*test_image_name=*/kPortraitImageName,
+               /* blendshape_model_name= */ std::nullopt,
-               /*norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
+               /* test_image_name=*/kPortraitImageName,
-               /*expected_presence = */ true,
+               /* norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
-               /*expected_landmarks = */
+               /* expected_presence= */ true,
               /* expected_landmarks= */
               GetExpectedLandmarkList(kPortraitExpectedFaceLandamrksName),
-               /*landmarks_diff_threshold = */ kFractionDiff},
+               /* expected_blendshapes= */ std::nullopt,
               /* landmarks_diff_threshold= */ kFractionDiff,
               /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
           SingeFaceTestParams{
               /* test_name= */ "PortraitWithAttention",
-               /*input_model_name= */ kFaceLandmarksDetectionWithAttentionModel,
+               /* landmarks_model_name= */
-               /*test_image_name=*/kPortraitImageName,
+               kFaceLandmarksDetectionWithAttentionModel,
-               /*norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
+               /* blendshape_model_name= */ std::nullopt,
-               /*expected_presence = */ true,
+               /* test_image_name= */ kPortraitImageName,
-               /*expected_landmarks = */
+               /* norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
               /* expected_presence= */ true,
               /* expected_landmarks= */
               GetExpectedLandmarkList(
                   kPortraitExpectedFaceLandamrksWithAttentionName),
-               /*landmarks_diff_threshold = */ kFractionDiff}),
+               /* expected_blendshapes= */ std::nullopt,
               /* landmarks_diff_threshold= */ kFractionDiff,
               /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
           SingeFaceTestParams{
               /* test_name= */ "PortraitWithAttentionWithBlendshapes",
               /* landmarks_model_name= */
               kFaceLandmarksDetectionWithAttentionModel,
               /* blendshape_model_name= */ kFaceBlendshapesModel,
               /* test_image_name= */ kPortraitImageName,
               /* norm_rect= */ MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0),
               /* expected_presence= */ true,
               /* expected_landmarks= */
               GetExpectedLandmarkList(
                   kPortraitExpectedFaceLandamrksWithAttentionName),
               /* expected_blendshapes= */
               GetBlendshapes(kPortraitExpectedBlendshapesName),
               /* landmarks_diff_threshold= */ kFractionDiff,
               /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin}),
    [](const TestParamInfo<SingleFaceLandmarksDetectionTest::ParamType>& info) {
      return info.param.test_name;
    });
@ -310,31 +414,57 @@ INSTANTIATE_TEST_SUITE_P(
    Values(
        MultiFaceTestParams{
            /* test_name= */ "Portrait",
-            /*input_model_name= */ kFaceLandmarksDetectionModel,
+            /* landmarks_model_name= */ kFaceLandmarksDetectionModel,
-            /*test_image_name=*/kPortraitImageName,
+            /* blendshape_model_name= */ std::nullopt,
-            /*norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
+            /* test_image_name= */ kPortraitImageName,
-            /*expected_presence = */ {true},
+            /* norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
-            /*expected_landmarks_list = */
+            /* expected_presence= */ {true},
            /* expected_landmarks_list= */
            {{GetExpectedLandmarkList(kPortraitExpectedFaceLandamrksName)}},
-            /*landmarks_diff_threshold = */ kFractionDiff},
+            /* expected_blendshapes= */ std::nullopt,
            /* landmarks_diff_threshold= */ kFractionDiff,
            /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
        MultiFaceTestParams{
            /* test_name= */ "PortraitWithAttention",
-            /*input_model_name= */ kFaceLandmarksDetectionWithAttentionModel,
+            /* landmarks_model_name= */
-            /*test_image_name=*/kPortraitImageName,
+            kFaceLandmarksDetectionWithAttentionModel,
-            /*norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
+            /* blendshape_model_name= */ std::nullopt,
-            /*expected_presence = */ {true},
+            /* test_image_name= */ kPortraitImageName,
-            /*expected_landmarks_list = */
+            /* norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
            /* expected_presence= */ {true},
            /* expected_landmarks_list= */
            {{GetExpectedLandmarkList(
                kPortraitExpectedFaceLandamrksWithAttentionName)}},
-            /*landmarks_diff_threshold = */ kFractionDiff},
+            /* expected_blendshapes= */ std::nullopt,
            /* landmarks_diff_threshold= */ kFractionDiff,
            /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
        MultiFaceTestParams{
            /* test_name= */ "PortraitWithAttentionWithBlendshapes",
            /* landmarks_model_name= */
            kFaceLandmarksDetectionWithAttentionModel,
            /* blendshape_model_name= */ kFaceBlendshapesModel,
            /* test_image_name= */ kPortraitImageName,
            /* norm_rects= */ {MakeNormRect(0.4987, 0.2211, 0.2877, 0.2303, 0)},
            /* expected_presence= */ {true},
            /* expected_landmarks_list= */
            {{GetExpectedLandmarkList(
                kPortraitExpectedFaceLandamrksWithAttentionName)}},
            /* expected_blendshapes= */
            {{GetBlendshapes(kPortraitExpectedBlendshapesName)}},
            /* landmarks_diff_threshold= */ kFractionDiff,
            /* blendshapes_diff_threshold= */ kBlendshapesDiffMargin},
        MultiFaceTestParams{
            /* test_name= */ "NoFace",
-            /*input_model_name= */ kFaceLandmarksDetectionModel,
+            /* landmarks_model_name= */
-            /*test_image_name=*/kCatImageName,
+            kFaceLandmarksDetectionModel,
-            /*norm_rects= */ {MakeNormRect(0.5, 0.5, 1.0, 1.0, 0)},
+            /* blendshape_model_name= */ std::nullopt,
-            /*expected_presence = */ {false},
+            /* test_image_name= */ kCatImageName,
-            /*expected_landmarks_list = */ std::nullopt,
+            /* norm_rects= */ {MakeNormRect(0.5, 0.5, 1.0, 1.0, 0)},
-            /*landmarks_diff_threshold = */ kFractionDiff}),
+            /* expected_presence= */ {false},
            /* expected_landmarks_list= */ std::nullopt,
            /* expected_blendshapes= */ std::nullopt,
            /*landmarks_diff_threshold= */ kFractionDiff,
            /*blendshapes_diff_threshold= */ kBlendshapesDiffMargin}),
    [](const TestParamInfo<MultiFaceLandmarksDetectionTest::ParamType>& info) {
      return info.param.test_name;
    });
--- a/mediapipe/tasks/cc/vision/face_landmarker/proto/BUILD
+++ b/mediapipe/tasks/cc/vision/face_landmarker/proto/BUILD
@ -34,6 +34,7 @@ mediapipe_proto_library(
    name = "face_landmarks_detector_graph_options_proto",
    srcs = ["face_landmarks_detector_graph_options.proto"],
    deps = [
        ":face_blendshapes_graph_options_proto",
        "//mediapipe/framework:calculator_options_proto",
        "//mediapipe/framework:calculator_proto",
        "//mediapipe/tasks/cc/core/proto:base_options_proto",
--- a/mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.proto
+++ b/mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.proto
@ -20,6 +20,7 @@ package mediapipe.tasks.vision.face_landmarker.proto;
 import "mediapipe/framework/calculator.proto";
 import "mediapipe/framework/calculator_options.proto";
 import "mediapipe/tasks/cc/core/proto/base_options.proto";
 import "mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options.proto";
 option java_package = "com.google.mediapipe.tasks.vision.facelandmarker.proto";
 option java_outer_classname = "FaceLandmarksDetectorGraphOptionsProto";
@ -35,4 +36,8 @@ message FaceLandmarksDetectorGraphOptions {
  // Minimum confidence value ([0.0, 1.0]) for confidence score to be considered
  // successfully detecting a face in the image.
  optional float min_detection_confidence = 2 [default = 0.5];
  // Optional options for FaceBlendshapeGraph. If this options is set, the
  // FaceLandmarksDetectorGraph would output the face blendshapes.
  optional FaceBlendshapesGraphOptions face_blendshapes_graph_options = 3;
 }
--- a/mediapipe/tasks/testdata/vision/portrait_expected_blendshapes_with_attention.pbtxt
+++ b/mediapipe/tasks/testdata/vision/portrait_expected_blendshapes_with_attention.pbtxt
@ -0,0 +1,262 @@
 # proto-file: mediapipe/framework/formats/classification.proto
 # proto-message: ClassificationList
 classification {
  index: 0
  score: 4.9559007e-06
  label: "_neutral"
 }
 classification {
  index: 1
  score: 0.22943014
  label: "browDownLeft"
 }
 classification {
  index: 2
  score: 0.22297752
  label: "browDownRight"
 }
 classification {
  index: 3
  score: 0.015948873
  label: "browInnerUp"
 }
 classification {
  index: 4
  score: 0.006946607
  label: "browOuterUpLeft"
 }
 classification {
  index: 5
  score: 0.0070318673
  label: "browOuterUpRight"
 }
 classification {
  index: 6
  score: 0.0013679645
  label: "cheekPuff"
 }
 classification {
  index: 7
  score: 7.1003383e-06
  label: "cheekSquintLeft"
 }
 classification {
  index: 8
  score: 5.78299e-06
  label: "cheekSquintRight"
 }
 classification {
  index: 9
  score: 0.20132238
  label: "eyeBlinkLeft"
 }
 classification {
  index: 10
  score: 0.16521452
  label: "eyeBlinkRight"
 }
 classification {
  index: 11
  score: 0.03764786
  label: "eyeLookDownLeft"
 }
 classification {
  index: 12
  score: 0.04828824
  label: "eyeLookDownRight"
 }
 classification {
  index: 13
  score: 0.016539993
  label: "eyeLookInLeft"
 }
 classification {
  index: 14
  score: 0.20026363
  label: "eyeLookInRight"
 }
 classification {
  index: 15
  score: 0.21363346
  label: "eyeLookOutLeft"
 }
 classification {
  index: 16
  score: 0.024430025
  label: "eyeLookOutRight"
 }
 classification {
  index: 17
  score: 0.30147508
  label: "eyeLookUpLeft"
 }
 classification {
  index: 18
  score: 0.28701693
  label: "eyeLookUpRight"
 }
 classification {
  index: 19
  score: 0.67143106
  label: "eyeSquintLeft"
 }
 classification {
  index: 20
  score: 0.5306328
  label: "eyeSquintRight"
 }
 classification {
  index: 21
  score: 0.0041342233
  label: "eyeWideLeft"
 }
 classification {
  index: 22
  score: 0.005231879
  label: "eyeWideRight"
 }
 classification {
  index: 23
  score: 0.009427094
  label: "jawForward"
 }
 classification {
  index: 24
  score: 0.0015789346
  label: "jawLeft"
 }
 classification {
  index: 25
  score: 0.073719256
  label: "jawOpen"
 }
 classification {
  index: 26
  score: 0.00046979196
  label: "jawRight"
 }
 classification {
  index: 27
  score: 0.0011400756
  label: "mouthClose"
 }
 classification {
  index: 28
  score: 0.0060502808
  label: "mouthDimpleLeft"
 }
 classification {
  index: 29
  score: 0.013351685
  label: "mouthDimpleRight"
 }
 classification {
  index: 30
  score: 0.09859665
  label: "mouthFrownLeft"
 }
 classification {
  index: 31
  score: 0.08897466
  label: "mouthFrownRight"
 }
 classification {
  index: 32
  score: 0.0020718675
  label: "mouthFunnel"
 }
 classification {
  index: 33
  score: 6.42887e-06
  label: "mouthLeft"
 }
 classification {
  index: 34
  score: 0.68950605
  label: "mouthLowerDownLeft"
 }
 classification {
  index: 35
  score: 0.7864029
  label: "mouthLowerDownRight"
 }
 classification {
  index: 36
  score: 0.056456964
  label: "mouthPressLeft"
 }
 classification {
  index: 37
  score: 0.037348792
  label: "mouthPressRight"
 }
 classification {
  index: 38
  score: 0.00067001814
  label: "mouthPucker"
 }
 classification {
  index: 39
  score: 0.005189785
  label: "mouthRight"
 }
 classification {
  index: 40
  score: 0.018723497
  label: "mouthRollLower"
 }
 classification {
  index: 41
  score: 0.052819636
  label: "mouthRollUpper"
 }
 classification {
  index: 42
  score: 0.0033772716
  label: "mouthShrugLower"
 }
 classification {
  index: 43
  score: 0.0031609535
  label: "mouthShrugUpper"
 }
 classification {
  index: 44
  score: 0.49639142
  label: "mouthSmileLeft"
 }
 classification {
  index: 45
  score: 0.4014515
  label: "mouthSmileRight"
 }
 classification {
  index: 46
  score: 0.5825701
  label: "mouthStretchLeft"
 }
 classification {
  index: 47
  score: 0.73058575
  label: "mouthStretchRight"
 }
 classification {
  index: 48
  score: 0.13561466
  label: "mouthUpperUpLeft"
 }
 classification {
  index: 49
  score: 0.20078722
  label: "mouthUpperUpRight"
 }
 classification {
  index: 50
  score: 3.3396598e-06
  label: "noseSneerLeft"
 }
 classification {
  index: 51
  score: 1.3096546e-05
  label: "noseSneerRight"
 }