Add FaceLandmarker C++ API

PiperOrigin-RevId: 515912777
2023-03-11 13:02:56 -08:00 · 2023-03-11 13:02:56 -08:00 · 296ee33be5
commit 296ee33be5
parent c94de4032d
7 changed files with 947 additions and 12 deletions
--- a/mediapipe/tasks/cc/vision/face_landmarker/BUILD
+++ b/mediapipe/tasks/cc/vision/face_landmarker/BUILD
@ -129,6 +129,37 @@ cc_library(
    ],
 )
 cc_library(
    name = "face_landmarker",
    srcs = ["face_landmarker.cc"],
    hdrs = ["face_landmarker.h"],
    deps = [
        ":face_landmarker_graph",
        ":face_landmarker_result",
        "//mediapipe/framework/api2:builder",
        "//mediapipe/framework/formats:classification_cc_proto",
        "//mediapipe/framework/formats:image",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/formats:matrix",
        "//mediapipe/framework/formats:matrix_data_cc_proto",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/tasks/cc/components/containers:classification_result",
        "//mediapipe/tasks/cc/core:base_options",
        "//mediapipe/tasks/cc/core:base_task_api",
        "//mediapipe/tasks/cc/core:task_runner",
        "//mediapipe/tasks/cc/core:utils",
        "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
        "//mediapipe/tasks/cc/vision/core:image_processing_options",
        "//mediapipe/tasks/cc/vision/core:running_mode",
        "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
        "//mediapipe/tasks/cc/vision/face_detector/proto:face_detector_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
        "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarker_graph_options_cc_proto",
        "//mediapipe/tasks/cc/vision/face_landmarker/proto:face_landmarks_detector_graph_options_cc_proto",
        "@com_google_absl//absl/status:statusor",
    ],
 )
 cc_library(
    name = "face_landmarker_result_cc",
    srcs = ["face_landmarker_result.cc"],
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc
@ -0,0 +1,250 @@
 /* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h"
 #include "mediapipe/framework/api2/builder.h"
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/matrix.h"
 #include "mediapipe/framework/formats/matrix_data.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/components/containers/classification_result.h"
 #include "mediapipe/tasks/cc/core/base_task_api.h"
 #include "mediapipe/tasks/cc/core/task_runner.h"
 #include "mediapipe/tasks/cc/core/utils.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
 #include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
 #include "mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options.pb.h"
 namespace mediapipe {
 namespace tasks {
 namespace vision {
 namespace face_landmarker {
 namespace {
 using FaceLandmarkerGraphOptionsProto = ::mediapipe::tasks::vision::
    face_landmarker::proto::FaceLandmarkerGraphOptions;
 constexpr char kFaceLandmarkerGraphTypeName[] =
    "mediapipe.tasks.vision.face_landmarker.FaceLandmarkerGraph";
 constexpr char kImageTag[] = "IMAGE";
 constexpr char kImageInStreamName[] = "image_in";
 constexpr char kImageOutStreamName[] = "image_out";
 constexpr char kNormRectTag[] = "NORM_RECT";
 constexpr char kNormRectStreamName[] = "norm_rect_in";
 constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS";
 constexpr char kNormLandmarksStreamName[] = "norm_landmarks";
 constexpr char kBlendshapesTag[] = "BLENDSHAPES";
 constexpr char kBlendshapesStreamName[] = "blendshapes";
 constexpr char kFaceGeometryTag[] = "FACE_GEOMETRY";
 constexpr char kFaceGeometryStreamName[] = "face_geometry";
 constexpr int kMicroSecondsPerMilliSecond = 1000;
 // Creates a MediaPipe graph config that contains a subgraph node of
 // "mediapipe.tasks.vision.face_ladnamrker.FaceLandmarkerGraph". If the task is
 // running in the live stream mode, a "FlowLimiterCalculator" will be added to
 // limit the number of frames in flight.
 CalculatorGraphConfig CreateGraphConfig(
    std::unique_ptr<FaceLandmarkerGraphOptionsProto> options,
    bool output_face_blendshapes, bool output_facial_transformation_matrixes,
    bool enable_flow_limiting) {
  api2::builder::Graph graph;
  auto& subgraph = graph.AddNode(kFaceLandmarkerGraphTypeName);
  subgraph.GetOptions<FaceLandmarkerGraphOptionsProto>().Swap(options.get());
  graph.In(kImageTag).SetName(kImageInStreamName);
  graph.In(kNormRectTag).SetName(kNormRectStreamName);
  subgraph.Out(kNormLandmarksTag).SetName(kNormLandmarksStreamName) >>
      graph.Out(kNormLandmarksTag);
  subgraph.Out(kImageTag).SetName(kImageOutStreamName) >> graph.Out(kImageTag);
  if (output_face_blendshapes) {
    subgraph.Out(kBlendshapesTag).SetName(kBlendshapesStreamName) >>
        graph.Out(kBlendshapesTag);
  }
  if (output_facial_transformation_matrixes) {
    subgraph.Out(kFaceGeometryTag).SetName(kFaceGeometryStreamName) >>
        graph.Out(kFaceGeometryTag);
  }
  if (enable_flow_limiting) {
    return tasks::core::AddFlowLimiterCalculator(
        graph, subgraph, {kImageTag, kNormRectTag}, kNormLandmarksTag);
  }
  graph.In(kImageTag) >> subgraph.In(kImageTag);
  graph.In(kNormRectTag) >> subgraph.In(kNormRectTag);
  return graph.GetConfig();
 }
 // Converts the user-facing FaceLandmarkerOptions struct to the internal
 // FaceLandmarkerGraphOptions proto.
 std::unique_ptr<FaceLandmarkerGraphOptionsProto>
 ConvertFaceLandmarkerGraphOptionsProto(FaceLandmarkerOptions* options) {
  auto options_proto = std::make_unique<FaceLandmarkerGraphOptionsProto>();
  auto base_options_proto = std::make_unique<tasks::core::proto::BaseOptions>(
      tasks::core::ConvertBaseOptionsToProto(&(options->base_options)));
  options_proto->mutable_base_options()->Swap(base_options_proto.get());
  options_proto->mutable_base_options()->set_use_stream_mode(
      options->running_mode != core::RunningMode::IMAGE);
  // Configure face detector options.
  auto* face_detector_graph_options =
      options_proto->mutable_face_detector_graph_options();
  face_detector_graph_options->set_num_faces(options->num_faces);
  face_detector_graph_options->set_min_detection_confidence(
      options->min_face_detection_confidence);
  // Configure face landmark detector options.
  options_proto->set_min_tracking_confidence(options->min_tracking_confidence);
  auto* face_landmarks_detector_graph_options =
      options_proto->mutable_face_landmarks_detector_graph_options();
  face_landmarks_detector_graph_options->set_min_detection_confidence(
      options->min_face_presence_confidence);
  return options_proto;
 }
 FaceLandmarkerResult GetFaceLandmarkerResultFromPacketMap(
    const tasks::core::PacketMap& packet_map) {
  const auto& face_landmarks = packet_map.at(kNormLandmarksStreamName)
                                   .Get<std::vector<NormalizedLandmarkList>>();
  std::optional<std::vector<ClassificationList>> face_blendshapes;
  if (packet_map.find(kBlendshapesStreamName) != packet_map.end()) {
    face_blendshapes = packet_map.at(kBlendshapesStreamName)
                           .Get<std::vector<ClassificationList>>();
  }
  std::optional<std::vector<MatrixData>> matrix_data_list;
  if (packet_map.find(kFaceGeometryStreamName) != packet_map.end()) {
    const auto& face_geometry_list =
        packet_map.at(kFaceGeometryStreamName)
            .Get<std::vector<face_geometry::proto::FaceGeometry>>();
    matrix_data_list = std::vector<MatrixData>(face_geometry_list.size());
    std::transform(face_geometry_list.begin(), face_geometry_list.end(),
                   matrix_data_list->begin(),
                   [](const face_geometry::proto::FaceGeometry& face_geometry) {
                     return face_geometry.pose_transform_matrix();
                   });
  }
  return ConvertToFaceLandmarkerResult(
      /* face_landmarks_proto = */ face_landmarks,
      /* face_blendshapes_proto= */ face_blendshapes,
      /* facial_transformation_matrixes_proto= */ matrix_data_list);
 }
 }  // namespace
 absl::StatusOr<std::unique_ptr<FaceLandmarker>> FaceLandmarker::Create(
    std::unique_ptr<FaceLandmarkerOptions> options) {
  auto options_proto = ConvertFaceLandmarkerGraphOptionsProto(options.get());
  tasks::core::PacketsCallback packets_callback = nullptr;
  if (options->result_callback) {
    auto result_callback = options->result_callback;
    packets_callback = [=](absl::StatusOr<tasks::core::PacketMap> packet_map) {
      if (!packet_map.ok()) {
        Image image;
        result_callback(packet_map.status(), image, Timestamp::Unset().Value());
        return;
      }
      if (packet_map->at(kImageOutStreamName).IsEmpty()) {
        return;
      }
      Packet image_packet = packet_map->at(kImageOutStreamName);
      if (packet_map->at(kNormLandmarksStreamName).IsEmpty()) {
        Packet empty_packet = packet_map->at(kNormLandmarksStreamName);
        result_callback(
            {FaceLandmarkerResult()}, image_packet.Get<Image>(),
            empty_packet.Timestamp().Value() / kMicroSecondsPerMilliSecond);
        return;
      }
      result_callback(
          GetFaceLandmarkerResultFromPacketMap(*packet_map),
          image_packet.Get<Image>(),
          packet_map->at(kNormLandmarksStreamName).Timestamp().Value() /
              kMicroSecondsPerMilliSecond);
    };
  }
  return core::VisionTaskApiFactory::Create<FaceLandmarker,
                                            FaceLandmarkerGraphOptionsProto>(
      CreateGraphConfig(
          std::move(options_proto), options->output_face_blendshapes,
          options->output_facial_transformation_matrixes,
          options->running_mode == core::RunningMode::LIVE_STREAM),
      std::move(options->base_options.op_resolver), options->running_mode,
      std::move(packets_callback));
 }
 absl::StatusOr<FaceLandmarkerResult> FaceLandmarker::Detect(
    mediapipe::Image image,
    std::optional<core::ImageProcessingOptions> image_processing_options) {
  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
                   ConvertToNormalizedRect(image_processing_options,
                                           /*roi_allowed=*/false));
  ASSIGN_OR_RETURN(
      auto output_packets,
      ProcessImageData(
          {{kImageInStreamName, MakePacket<Image>(std::move(image))},
           {kNormRectStreamName,
            MakePacket<NormalizedRect>(std::move(norm_rect))}}));
  if (output_packets[kNormLandmarksStreamName].IsEmpty()) {
    return {FaceLandmarkerResult()};
  }
  return GetFaceLandmarkerResultFromPacketMap(output_packets);
 }
 absl::StatusOr<FaceLandmarkerResult> FaceLandmarker::DetectForVideo(
    mediapipe::Image image, int64_t timestamp_ms,
    std::optional<core::ImageProcessingOptions> image_processing_options) {
  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
                   ConvertToNormalizedRect(image_processing_options,
                                           /*roi_allowed=*/false));
  ASSIGN_OR_RETURN(
      auto output_packets,
      ProcessVideoData(
          {{kImageInStreamName,
            MakePacket<Image>(std::move(image))
                .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
           {kNormRectStreamName,
            MakePacket<NormalizedRect>(std::move(norm_rect))
                .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
  if (output_packets[kNormLandmarksStreamName].IsEmpty()) {
    return {FaceLandmarkerResult()};
  }
  return GetFaceLandmarkerResultFromPacketMap(output_packets);
 }
 absl::Status FaceLandmarker::DetectAsync(
    mediapipe::Image image, int64_t timestamp_ms,
    std::optional<core::ImageProcessingOptions> image_processing_options) {
  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
                   ConvertToNormalizedRect(image_processing_options,
                                           /*roi_allowed=*/false));
  return SendLiveStreamData(
      {{kImageInStreamName,
        MakePacket<Image>(std::move(image))
            .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
       {kNormRectStreamName,
        MakePacket<NormalizedRect>(std::move(norm_rect))
            .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
 }
 }  // namespace face_landmarker
 }  // namespace vision
 }  // namespace tasks
 }  // namespace mediapipe
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h
@ -0,0 +1,198 @@
 /* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_
 #define MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_
 #include <memory>
 #include <optional>
 #include <vector>
 #include "absl/status/statusor.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
 #include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h"
 namespace mediapipe {
 namespace tasks {
 namespace vision {
 namespace face_landmarker {
 struct FaceLandmarkerOptions {
  // Base options for configuring MediaPipe Tasks library, such as specifying
  // the TfLite model bundle file with metadata, accelerator options, op
  // resolver, etc.
  tasks::core::BaseOptions base_options;
  // The running mode of the task. Default to the image mode.
  // FaceLandmarker has three running modes:
  // 1) The image mode for detecting face landmarks on single image inputs.
  // 2) The video mode for detecting face landmarks on the decoded frames of a
  //    video.
  // 3) The live stream mode for detecting face landmarks on the live stream of
  //    input data, such as from camera. In this mode, the "result_callback"
  //    below must be specified to receive the detection results asynchronously.
  core::RunningMode running_mode = core::RunningMode::IMAGE;
  // The maximum number of faces that can be detected by the FaceLandmarker.
  int num_faces = 1;
  // The minimum confidence score for the face detection to be considered
  // successful.
  float min_face_detection_confidence = 0.5;
  // The minimum confidence score of face presence score in the face landmark
  // detection.
  float min_face_presence_confidence = 0.5;
  // The minimum confidence score for the face tracking to be considered
  // successful.
  float min_tracking_confidence = 0.5;
  // Whether FaceLandmarker outputs face blendshapes classification. Face
  // blendshapes are used for rendering the 3D face model.
  bool output_face_blendshapes = false;
  // Whether FaceLandmarker outputs facial transformation_matrix. Facial
  // transformation matrix is used to transform the face landmarks in canonical
  // face to the detected face, so that users can apply face effects on the
  // detected landmarks.
  bool output_facial_transformation_matrixes = false;
  // The user-defined result callback for processing live stream data.
  // The result callback should only be specified when the running mode is set
  // to RunningMode::LIVE_STREAM.
  std::function<void(absl::StatusOr<FaceLandmarkerResult>, const Image&,
                     int64_t)>
      result_callback = nullptr;
 };
 // Performs face landmarks detection on the given image.
 //
 // TODO  add the link to DevSite.
 // This API expects a pre-trained face landmarker model asset bundle.
 //
 // Inputs:
 //   Image
 //     - The image that face landmarks detection runs on.
 //   std::optional<NormalizedRect>
 //     - If provided, can be used to specify the rotation to apply to the image
 //       before performing face landmarks detection, by setting its 'rotation'
 //       field in radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation).
 //       Note that specifying a region-of-interest using the 'x_center',
 //       'y_center', 'width' and 'height' fields is NOT supported and will
 //       result in an invalid argument error being returned.
 // Outputs:
 //   FaceLandmarkerResult
 //     - The face landmarks detection results.
 class FaceLandmarker : tasks::vision::core::BaseVisionTaskApi {
 public:
  using BaseVisionTaskApi::BaseVisionTaskApi;
  // Creates a FaceLandmarker from a FaceLandmarkerOptions to process image data
  // or streaming data. Face landmarker can be created with one of the following
  // three running modes:
  // 1) Image mode for detecting face landmarks on single image inputs. Users
  //    provide mediapipe::Image to the `Detect` method, and will receive the
  //    deteced face landmarks results as the return value.
  // 2) Video mode for detecting face landmarks on the decoded frames of a
  //    video. Users call `DetectForVideo` method, and will receive the detected
  //    face landmarks results as the return value.
  // 3) Live stream mode for detecting face landmarks on the live stream of the
  //    input data, such as from camera. Users call `DetectAsync` to push the
  //    image data into the FaceLandmarker, the detected results along with the
  //    input timestamp and the image that face landmarker runs on will be
  //    available in the result callback when the face landmarker finishes the
  //    work.
  static absl::StatusOr<std::unique_ptr<FaceLandmarker>> Create(
      std::unique_ptr<FaceLandmarkerOptions> options);
  // Performs face landmarks detection on the given image.
  // Only use this method when the FaceLandmarker is created with the image
  // running mode.
  //
  // The optional 'image_processing_options' parameter can be used to specify
  // the rotation to apply to the image before performing detection, by setting
  // its 'rotation_degrees' field. Note that specifying a region-of-interest
  // using the 'region_of_interest' field is NOT supported and will result in an
  // invalid argument error being returned.
  //
  // The image can be of any size with format RGB or RGBA.
  // TODO: Describes how the input image will be preprocessed
  // after the yuv support is implemented.
  absl::StatusOr<FaceLandmarkerResult> Detect(
      Image image,
      std::optional<core::ImageProcessingOptions> image_processing_options =
          std::nullopt);
  // Performs face landmarks detection on the provided video frame.
  // Only use this method when the FaceLandmarker is created with the video
  // running mode.
  //
  // The optional 'image_processing_options' parameter can be used to specify
  // the rotation to apply to the image before performing detection, by setting
  // its 'rotation_degrees' field. Note that specifying a region-of-interest
  // using the 'region_of_interest' field is NOT supported and will result in an
  // invalid argument error being returned.
  //
  // The image can be of any size with format RGB or RGBA. It's required to
  // provide the video frame's timestamp (in milliseconds). The input timestamps
  // must be monotonically increasing.
  absl::StatusOr<FaceLandmarkerResult> DetectForVideo(
      Image image, int64_t timestamp_ms,
      std::optional<core::ImageProcessingOptions> image_processing_options =
          std::nullopt);
  // Sends live image data to perform face landmarks detection, and the results
  // will be available via the "result_callback" provided in the
  // FaceLandmarkerOptions. Only use this method when the FaceLandmarker
  // is created with the live stream running mode.
  //
  // The image can be of any size with format RGB or RGBA. It's required to
  // provide a timestamp (in milliseconds) to indicate when the input image is
  // sent to the face landmarker. The input timestamps must be monotonically
  // increasing.
  //
  // The optional 'image_processing_options' parameter can be used to specify
  // the rotation to apply to the image before performing detection, by setting
  // its 'rotation_degrees' field. Note that specifying a region-of-interest
  // using the 'region_of_interest' field is NOT supported and will result in an
  // invalid argument error being returned.
  //
  // The "result_callback" provides
  //   - A vector of FaceLandmarkerResult, each is the detected results
  //     for a input frame.
  //   - The const reference to the corresponding input image that the face
  //     landmarker runs on. Note that the const reference to the image will no
  //     longer be valid when the callback returns. To access the image data
  //     outside of the callback, callers need to make a copy of the image.
  //   - The input timestamp in milliseconds.
  absl::Status DetectAsync(Image image, int64_t timestamp_ms,
                           std::optional<core::ImageProcessingOptions>
                               image_processing_options = std::nullopt);
  // Shuts down the FaceLandmarker when all works are done.
  absl::Status Close() { return runner_->Close(); }
 };
 }  // namespace face_landmarker
 }  // namespace vision
 }  // namespace tasks
 }  // namespace mediapipe
 #endif  // MEDIAPIPE_TASKS_CC_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.cc
@ -34,7 +34,7 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult(
    std::optional<std::vector<mediapipe::ClassificationList>>
        face_blendshapes_proto,
    std::optional<std::vector<mediapipe::MatrixData>>
-        facial_transformation_matrix_proto) {
+        facial_transformation_matrixes_proto) {
  FaceLandmarkerResult result;
  result.face_landmarks.resize(face_landmarks_proto.size());
  std::transform(face_landmarks_proto.begin(), face_landmarks_proto.end(),
@ -52,12 +52,12 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult(
              classification_list);
        });
  }
-  if (facial_transformation_matrix_proto.has_value()) {
+  if (facial_transformation_matrixes_proto.has_value()) {
-    result.facial_transformation_matrix =
+    result.facial_transformation_matrixes =
-        std::vector<Matrix>(facial_transformation_matrix_proto->size());
+        std::vector<Matrix>(facial_transformation_matrixes_proto->size());
-    std::transform(facial_transformation_matrix_proto->begin(),
+    std::transform(facial_transformation_matrixes_proto->begin(),
-                   facial_transformation_matrix_proto->end(),
+                   facial_transformation_matrixes_proto->end(),
-                   result.facial_transformation_matrix->begin(),
+                   result.facial_transformation_matrixes->begin(),
                   [](const mediapipe::MatrixData& matrix_proto) {
                     mediapipe::Matrix matrix;
                     MatrixFromMatrixDataProto(matrix_proto, &matrix);
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h
@ -40,7 +40,7 @@ struct FaceLandmarkerResult {
  std::optional<std::vector<components::containers::Classifications>>
      face_blendshapes;
  // Optional facial transformation matrix.
-  std::optional<std::vector<Matrix>> facial_transformation_matrix;
+  std::optional<std::vector<Matrix>> facial_transformation_matrixes;
 };
 // Convert face landmarks result from proto format to FaceLandmarkerResult.
@ -49,7 +49,7 @@ FaceLandmarkerResult ConvertToFaceLandmarkerResult(
    std::optional<std::vector<mediapipe::ClassificationList>>
        face_blendshapes_proto = std::nullopt,
    std::optional<std::vector<mediapipe::MatrixData>>
-        facial_transformation_matrix_proto = std::nullopt);
+        facial_transformation_matrixes_proto = std::nullopt);
 }  // namespace face_landmarker
 }  // namespace vision
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result_test.cc
@ -73,9 +73,10 @@ TEST(FaceLandmarkerResultTest, Succeeds) {
                                 std::nullopt));
  Matrix expected_matrix{{0, 3, 6}, {1, 4, 7}, {2, 5, 8}};
-  ASSERT_TRUE(face_landmarker_result.facial_transformation_matrix.has_value());
+  ASSERT_TRUE(
-  EXPECT_EQ(face_landmarker_result.facial_transformation_matrix->size(), 1);
+      face_landmarker_result.facial_transformation_matrixes.has_value());
-  EXPECT_EQ(face_landmarker_result.facial_transformation_matrix->at(0),
+  EXPECT_EQ(face_landmarker_result.facial_transformation_matrixes->size(), 1);
  EXPECT_EQ(face_landmarker_result.facial_transformation_matrixes->at(0),
            expected_matrix);
 }
--- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc
+++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_test.cc
@ -0,0 +1,455 @@
 /* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h"
 #include <cmath>
 #include <cstdint>
 #include <memory>
 #include <optional>
 #include <string>
 #include <utility>
 #include "absl/flags/flag.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/string_view.h"
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/matrix.h"
 #include "mediapipe/framework/formats/matrix_data.pb.h"
 #include "mediapipe/framework/port/file_helpers.h"
 #include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/components/containers/category.h"
 #include "mediapipe/tasks/cc/components/containers/classification_result.h"
 #include "mediapipe/tasks/cc/components/containers/landmark.h"
 #include "mediapipe/tasks/cc/components/containers/rect.h"
 #include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h"
 #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
 namespace mediapipe {
 namespace tasks {
 namespace vision {
 namespace face_landmarker {
 namespace {
 using ::file::Defaults;
 using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
 using ::testing::TestParamInfo;
 using ::testing::TestWithParam;
 using ::testing::Values;
 constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
 constexpr char kFaceLandmarkerModelBundleName[] = "face_landmarker.task";
 constexpr char kFaceLandmarkerWithBlendshapesModelBundleName[] =
    "face_landmarker_with_blendshapes.task";
 constexpr char kPortraitImageName[] = "portrait.jpg";
 constexpr char kPortraitExpectedFaceLandamrksName[] =
    "portrait_expected_face_landmarks.pbtxt";
 constexpr char kPortraitExpectedFaceLandamrksWithAttentionName[] =
    "portrait_expected_face_landmarks_with_attention.pbtxt";
 constexpr char kPortraitExpectedBlendshapesName[] =
    "portrait_expected_blendshapes_with_attention.pbtxt";
 constexpr float kLandmarksDiffMargin = 0.03;
 constexpr float kBlendshapesDiffMargin = 0.1;
 constexpr float kFacialTransformationMatrixDiffMargin = 0.02;
 template <typename ProtoT>
 ProtoT GetExpectedProto(absl::string_view filename) {
  ProtoT expected_proto;
  MP_EXPECT_OK(GetTextProto(file::JoinPath("./", kTestDataDirectory, filename),
                            &expected_proto, Defaults()));
  return expected_proto;
 }
 // Struct holding the parameters for parameterized FaceLandmarkerGraphTest
 // class.
 struct FaceLandmarkerTestParams {
  // The name of this test, for convenience when displaying test results.
  std::string test_name;
  // The filename of the model to test.
  std::string input_model_name;
  // The filename of the test image.
  std::string test_image_name;
  // The rotation to apply to the test image before processing, in degrees
  // clockwise.
  int rotation;
  // The expected output face landmarker result.
  FaceLandmarkerResult expected_result;
 };
 mediapipe::MatrixData MakePortraitExpectedFacialTransformationMatrix() {
  const Matrix matrix{{0.9995292, -0.005092691, 0.030254554, -0.37340546},
                      {0.0072318087, 0.99744856, -0.07102106, 22.212194},
                      {-0.029815676, 0.07120642, 0.9970159, -64.76358},
                      {0, 0, 0, 1}};
  mediapipe::MatrixData matrix_data;
  MatrixDataProtoFromMatrix(matrix, &matrix_data);
  return matrix_data;
 }
 testing::Matcher<components::containers::NormalizedLandmark> LandmarkIs(
    const components::containers::NormalizedLandmark& landmark) {
  return testing::AllOf(
      testing::Field(&components::containers::NormalizedLandmark::x,
                     testing::FloatNear(landmark.x, kLandmarksDiffMargin)),
      testing::Field(&components::containers::NormalizedLandmark::y,
                     testing::FloatNear(landmark.y, kLandmarksDiffMargin)));
 }
 void ExpectLandmarksCorrect(
    const std::vector<components::containers::NormalizedLandmarks>
        actual_landmarks,
    const std::vector<components::containers::NormalizedLandmarks>
        expected_landmarks) {
  ASSERT_EQ(actual_landmarks.size(), expected_landmarks.size());
  for (int i = 0; i < actual_landmarks.size(); ++i) {
    ASSERT_EQ(actual_landmarks[i].landmarks.size(),
              expected_landmarks[i].landmarks.size());
    for (int j = 0; j < actual_landmarks[i].landmarks.size(); ++j) {
      EXPECT_THAT(actual_landmarks[i].landmarks[j],
                  LandmarkIs(expected_landmarks[i].landmarks[j]));
    }
  }
 }
 testing::Matcher<components::containers::Category> CategoryIs(
    const components::containers::Category& category) {
  return testing::AllOf(
      testing::Field(&components::containers::Category::index,
                     testing::Eq(category.index)),
      testing::Field(
          &components::containers::Category::score,
          testing::FloatNear(category.score, kBlendshapesDiffMargin)));
 }
 void ExpectBlendshapesCorrect(
    const std::vector<components::containers::Classifications>&
        actual_blendshapes,
    const std::vector<components::containers::Classifications>&
        expected_blendshapes) {
  ASSERT_EQ(actual_blendshapes.size(), expected_blendshapes.size());
  for (int i = 0; i < actual_blendshapes.size(); ++i) {
    ASSERT_EQ(actual_blendshapes[i].categories.size(),
              expected_blendshapes[i].categories.size());
    for (int j = 0; j < actual_blendshapes[i].categories.size(); ++j) {
      EXPECT_THAT(actual_blendshapes[i].categories[j],
                  CategoryIs(expected_blendshapes[i].categories[j]));
    }
  }
 }
 void ExpectFacialTransformationMatrixCorrect(
    const std::vector<Matrix>& actual_matrix_list,
    const std::vector<Matrix>& expected_matrix_list) {
  ASSERT_EQ(actual_matrix_list.size(), expected_matrix_list.size());
  for (int i = 0; i < actual_matrix_list.size(); ++i) {
    const Matrix& actual_matrix = actual_matrix_list[i];
    const Matrix& expected_matrix = expected_matrix_list[i];
    ASSERT_EQ(actual_matrix.cols(), expected_matrix.cols());
    ASSERT_EQ(actual_matrix.rows(), expected_matrix.rows());
    for (int i = 0; i < actual_matrix.size(); ++i) {
      EXPECT_NEAR(actual_matrix.data()[i], expected_matrix.data()[i],
                  kFacialTransformationMatrixDiffMargin);
    }
  }
 }
 void ExpectFaceLandmarkerResultCorrect(
    const FaceLandmarkerResult& actual_result,
    const FaceLandmarkerResult& expected_result) {
  ExpectLandmarksCorrect(actual_result.face_landmarks,
                         expected_result.face_landmarks);
  ASSERT_EQ(actual_result.face_blendshapes.has_value(),
            expected_result.face_blendshapes.has_value());
  if (expected_result.face_blendshapes.has_value()) {
    ASSERT_TRUE(actual_result.face_blendshapes.has_value());
    ExpectBlendshapesCorrect(*actual_result.face_blendshapes,
                             *expected_result.face_blendshapes);
  }
  ASSERT_EQ(actual_result.facial_transformation_matrixes.has_value(),
            expected_result.facial_transformation_matrixes.has_value());
  if (expected_result.facial_transformation_matrixes.has_value()) {
    ASSERT_TRUE(actual_result.facial_transformation_matrixes.has_value());
    ExpectFacialTransformationMatrixCorrect(
        *actual_result.facial_transformation_matrixes,
        *expected_result.facial_transformation_matrixes);
  }
 }
 class ImageModeTest : public TestWithParam<FaceLandmarkerTestParams> {};
 TEST_P(ImageModeTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(file::JoinPath(
                       "./", kTestDataDirectory, GetParam().test_image_name)));
  auto options = std::make_unique<FaceLandmarkerOptions>();
  options->base_options.model_asset_path =
      file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name);
  options->running_mode = core::RunningMode::IMAGE;
  options->output_face_blendshapes =
      GetParam().expected_result.face_blendshapes.has_value();
  options->output_facial_transformation_matrixes =
      GetParam().expected_result.facial_transformation_matrixes.has_value();
  MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<FaceLandmarker> face_landmarker,
                          FaceLandmarker::Create(std::move(options)));
  FaceLandmarkerResult actual_result;
  if (GetParam().rotation != 0) {
    ImageProcessingOptions image_processing_options;
    image_processing_options.rotation_degrees = GetParam().rotation;
    MP_ASSERT_OK_AND_ASSIGN(
        actual_result,
        face_landmarker->Detect(image, image_processing_options));
  } else {
    MP_ASSERT_OK_AND_ASSIGN(actual_result, face_landmarker->Detect(image));
  }
  ExpectFaceLandmarkerResultCorrect(actual_result, GetParam().expected_result);
  MP_ASSERT_OK(face_landmarker->Close());
 }
 INSTANTIATE_TEST_SUITE_P(
    FaceLandmarkerTest, ImageModeTest,
    Values(FaceLandmarkerTestParams{
               /* test_name= */ "Portrait",
               /* input_model_name= */ kFaceLandmarkerModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithAttention",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithBlendshapes",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)},
                   {{GetExpectedProto<ClassificationList>(
                       kPortraitExpectedBlendshapesName)}})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithBlendshapesWithFacialTransformatio"
                                "nMatrix",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)},
                   {{GetExpectedProto<ClassificationList>(
                       kPortraitExpectedBlendshapesName)}},
                   {{MakePortraitExpectedFacialTransformationMatrix()}})}),
    [](const TestParamInfo<ImageModeTest::ParamType>& info) {
      return info.param.test_name;
    });
 class VideoModeTest : public TestWithParam<FaceLandmarkerTestParams> {};
 TEST_P(VideoModeTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(file::JoinPath(
                       "./", kTestDataDirectory, GetParam().test_image_name)));
  auto options = std::make_unique<FaceLandmarkerOptions>();
  options->base_options.model_asset_path =
      file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name);
  options->running_mode = core::RunningMode::VIDEO;
  options->output_face_blendshapes =
      GetParam().expected_result.face_blendshapes.has_value();
  options->output_facial_transformation_matrixes =
      GetParam().expected_result.facial_transformation_matrixes.has_value();
  MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<FaceLandmarker> face_landmarker,
                          FaceLandmarker::Create(std::move(options)));
  for (int i = 0; i < 3; ++i) {
    FaceLandmarkerResult actual_result;
    if (GetParam().rotation != 0) {
      ImageProcessingOptions image_processing_options;
      image_processing_options.rotation_degrees = GetParam().rotation;
      MP_ASSERT_OK_AND_ASSIGN(
          actual_result,
          face_landmarker->DetectForVideo(image, i, image_processing_options));
    } else {
      MP_ASSERT_OK_AND_ASSIGN(actual_result,
                              face_landmarker->DetectForVideo(image, i));
    }
    ExpectFaceLandmarkerResultCorrect(actual_result,
                                      GetParam().expected_result);
  }
  MP_ASSERT_OK(face_landmarker->Close());
 }
 INSTANTIATE_TEST_SUITE_P(
    FaceLandmarkerTest, VideoModeTest,
    Values(FaceLandmarkerTestParams{
               /* test_name= */ "Portrait",
               /* input_model_name= */ kFaceLandmarkerModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithAttention",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithBlendshapes",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)},
                   {{GetExpectedProto<ClassificationList>(
                       kPortraitExpectedBlendshapesName)}})}),
    [](const TestParamInfo<VideoModeTest::ParamType>& info) {
      return info.param.test_name;
    });
 class LiveStreamModeTest : public TestWithParam<FaceLandmarkerTestParams> {};
 TEST_P(LiveStreamModeTest, Succeeds) {
  MP_ASSERT_OK_AND_ASSIGN(
      Image image, DecodeImageFromFile(file::JoinPath(
                       "./", kTestDataDirectory, GetParam().test_image_name)));
  auto options = std::make_unique<FaceLandmarkerOptions>();
  options->base_options.model_asset_path =
      file::JoinPath("./", kTestDataDirectory, GetParam().input_model_name);
  options->running_mode = core::RunningMode::LIVE_STREAM;
  options->output_face_blendshapes =
      GetParam().expected_result.face_blendshapes.has_value();
  options->output_facial_transformation_matrixes =
      GetParam().expected_result.facial_transformation_matrixes.has_value();
  std::vector<FaceLandmarkerResult> face_landmarker_results;
  std::vector<int64_t> timestamps;
  options->result_callback = [&face_landmarker_results, &timestamps](
                                 absl::StatusOr<FaceLandmarkerResult> result,
                                 const Image& image, int64_t timestamp_ms) {
    MP_ASSERT_OK(result.status());
    face_landmarker_results.push_back(std::move(result.value()));
    timestamps.push_back(timestamp_ms);
  };
  MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<FaceLandmarker> face_landmarker,
                          FaceLandmarker::Create(std::move(options)));
  const int iterations = 100;
  for (int i = 0; i < iterations; ++i) {
    FaceLandmarkerResult actual_result;
    if (GetParam().rotation != 0) {
      ImageProcessingOptions image_processing_options;
      image_processing_options.rotation_degrees = GetParam().rotation;
      MP_ASSERT_OK(
          face_landmarker->DetectAsync(image, i, image_processing_options));
    } else {
      MP_ASSERT_OK(face_landmarker->DetectAsync(image, i));
    }
  }
  MP_ASSERT_OK(face_landmarker->Close());
  // Due to the flow limiter, the total of outputs will be smaller than the
  // number of iterations.
  ASSERT_LE(face_landmarker_results.size(), iterations);
  ASSERT_GT(face_landmarker_results.size(), 0);
  for (int i = 0; i < face_landmarker_results.size(); ++i) {
    ExpectFaceLandmarkerResultCorrect(face_landmarker_results[i],
                                      GetParam().expected_result);
  }
  int64_t timestamp_ms = -1;
  for (const auto& timestamp : timestamps) {
    EXPECT_GT(timestamp, timestamp_ms);
    timestamp_ms = timestamp;
  }
 }
 INSTANTIATE_TEST_SUITE_P(
    FaceLandmarkerTest, LiveStreamModeTest,
    Values(FaceLandmarkerTestParams{
               /* test_name= */ "Portrait",
               /* input_model_name= */ kFaceLandmarkerModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithAttention",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)})},
           FaceLandmarkerTestParams{
               /* test_name= */ "PortraitWithBlendshapes",
               /* input_model_name= */
               kFaceLandmarkerWithBlendshapesModelBundleName,
               /* test_image_name= */ kPortraitImageName,
               /* rotation= */ 0,
               /* expected_result= */
               ConvertToFaceLandmarkerResult(
                   {GetExpectedProto<NormalizedLandmarkList>(
                       kPortraitExpectedFaceLandamrksWithAttentionName)},
                   {{GetExpectedProto<ClassificationList>(
                       kPortraitExpectedBlendshapesName)}})}),
    [](const TestParamInfo<LiveStreamModeTest::ParamType>& info) {
      return info.param.test_name;
    });
 }  // namespace
 }  // namespace face_landmarker
 }  // namespace vision
 }  // namespace tasks
 }  // namespace mediapipe