mediapipe-rs/mediapipe/modules/face_geometry/libs/geometry_pipeline.cc

// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"

#include <cmath>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>

#include "Eigen/Core"
#include "absl/memory/memory.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"

namespace mediapipe::face_geometry {
namespace {

struct PerspectiveCameraFrustum {
  // NOTE: all arguments must be validated prior to calling this constructor.
  PerspectiveCameraFrustum(const PerspectiveCamera& perspective_camera,
                           int frame_width, int frame_height) {
    static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;

    const float height_at_near =
        2.f * perspective_camera.near() *
        std::tan(0.5f * kDegreesToRadians *
                 perspective_camera.vertical_fov_degrees());

    const float width_at_near = frame_width * height_at_near / frame_height;

    left = -0.5f * width_at_near;
    right = 0.5f * width_at_near;
    bottom = -0.5f * height_at_near;
    top = 0.5f * height_at_near;
    near = perspective_camera.near();
    far = perspective_camera.far();
  }

  float left;
  float right;
  float bottom;
  float top;
  float near;
  float far;
};

class ScreenToMetricSpaceConverter {
 public:
  ScreenToMetricSpaceConverter(
      OriginPointLocation origin_point_location,      //
      InputSource input_source,                       //
      Eigen::Matrix3Xf&& canonical_metric_landmarks,  //
      Eigen::VectorXf&& landmark_weights,             //
      std::unique_ptr<ProcrustesSolver> procrustes_solver)
      : origin_point_location_(origin_point_location),
        input_source_(input_source),
        canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
        landmark_weights_(std::move(landmark_weights)),
        procrustes_solver_(std::move(procrustes_solver)) {}

  // Converts `screen_landmark_list` into `metric_landmark_list` and estimates
  // the `pose_transform_mat`.
  //
  // Here's the algorithm summary:
  //
  // (1) Project X- and Y- screen landmark coordinates at the Z near plane.
  //
  // (2) Estimate a canonical-to-runtime landmark set scale by running the
  //     Procrustes solver using the screen runtime landmarks.
  //
  //     On this iteration, screen landmarks are used instead of unprojected
  //     metric landmarks as it is not safe to unproject due to the relative
  //     nature of the input screen landmark Z coordinate.
  //
  // (3) Use the canonical-to-runtime scale from (2) to unproject the screen
  //     landmarks. The result is referenced as "intermediate landmarks" because
  //     they are the first estimation of the resuling metric landmarks, but are
  //     not quite there yet.
  //
  // (4) Estimate a canonical-to-runtime landmark set scale by running the
  //     Procrustes solver using the intermediate runtime landmarks.
  //
  // (5) Use the product of the scale factors from (2) and (4) to unproject
  //     the screen landmarks the second time. This is the second and the final
  //     estimation of the metric landmarks.
  //
  // (6) Multiply each of the metric landmarks by the inverse pose
  //     transformation matrix to align the runtime metric face landmarks with
  //     the canonical metric face landmarks.
  //
  // Note: the input screen landmarks are in the left-handed coordinate system,
  //       however any metric landmarks - including the canonical metric
  //       landmarks, the final runtime metric landmarks and any intermediate
  //       runtime metric landmarks - are in the right-handed coordinate system.
  //
  //       To keep the logic correct, the landmark set handedness is changed any
  //       time the screen-to-metric semantic barrier is passed.
  absl::Status Convert(const NormalizedLandmarkList& screen_landmark_list,  //
                       const PerspectiveCameraFrustum& pcf,                 //
                       LandmarkList& metric_landmark_list,                  //
                       Eigen::Matrix4f& pose_transform_mat) const {
    RET_CHECK_EQ(screen_landmark_list.landmark_size(),
                 canonical_metric_landmarks_.cols())
        << "The number of landmarks doesn't match the number passed upon "
           "initialization!";

    Eigen::Matrix3Xf screen_landmarks;
    ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);

    ProjectXY(pcf, screen_landmarks);
    const float depth_offset = screen_landmarks.row(2).mean();

    // 1st iteration: don't unproject XY because it's unsafe to do so due to
    //                the relative nature of the Z coordinate. Instead, run the
    //                first estimation on the projected XY and use that scale to
    //                unproject for the 2nd iteration.
    Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
    ChangeHandedness(intermediate_landmarks);

    ASSIGN_OR_RETURN(const float first_iteration_scale,
                     EstimateScale(intermediate_landmarks),
                     _ << "Failed to estimate first iteration scale!");

    // 2nd iteration: unproject XY using the scale from the 1st iteration.
    intermediate_landmarks = screen_landmarks;
    MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
                    intermediate_landmarks);
    UnprojectXY(pcf, intermediate_landmarks);
    ChangeHandedness(intermediate_landmarks);

    // For face detection input landmarks, re-write Z-coord from the canonical
    // landmarks.
    if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
      Eigen::Matrix4f intermediate_pose_transform_mat;
      MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
          canonical_metric_landmarks_, intermediate_landmarks,
          landmark_weights_, intermediate_pose_transform_mat))
          << "Failed to estimate pose transform matrix!";

      intermediate_landmarks.row(2) =
          (intermediate_pose_transform_mat *
           canonical_metric_landmarks_.colwise().homogeneous())
              .row(2);
    }
    ASSIGN_OR_RETURN(const float second_iteration_scale,
                     EstimateScale(intermediate_landmarks),
                     _ << "Failed to estimate second iteration scale!");

    // Use the total scale to unproject the screen landmarks.
    const float total_scale = first_iteration_scale * second_iteration_scale;
    MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
    UnprojectXY(pcf, screen_landmarks);
    ChangeHandedness(screen_landmarks);

    // At this point, screen landmarks are converted into metric landmarks.
    Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;

    MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
        canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
        pose_transform_mat))
        << "Failed to estimate pose transform matrix!";

    // For face detection input landmarks, re-write Z-coord from the canonical
    // landmarks and run the pose transform estimation again.
    if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
      metric_landmarks.row(2) =
          (pose_transform_mat *
           canonical_metric_landmarks_.colwise().homogeneous())
              .row(2);

      MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
          canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
          pose_transform_mat))
          << "Failed to estimate pose transform matrix!";
    }

    // Multiply each of the metric landmarks by the inverse pose
    // transformation matrix to align the runtime metric face landmarks with
    // the canonical metric face landmarks.
    metric_landmarks = (pose_transform_mat.inverse() *
                        metric_landmarks.colwise().homogeneous())
                           .topRows(3);

    ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);

    return absl::OkStatus();
  }

 private:
  void ProjectXY(const PerspectiveCameraFrustum& pcf,
                 Eigen::Matrix3Xf& landmarks) const {
    float x_scale = pcf.right - pcf.left;
    float y_scale = pcf.top - pcf.bottom;
    float x_translation = pcf.left;
    float y_translation = pcf.bottom;

    if (origin_point_location_ == OriginPointLocation::TOP_LEFT_CORNER) {
      landmarks.row(1) = 1.f - landmarks.row(1).array();
    }

    landmarks =
        landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
    landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
  }

  absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
    Eigen::Matrix4f transform_mat;
    MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
        canonical_metric_landmarks_, landmarks, landmark_weights_,
        transform_mat))
        << "Failed to estimate canonical-to-runtime landmark set transform!";

    return transform_mat.col(0).norm();
  }

  static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
                              float depth_offset, float scale,
                              Eigen::Matrix3Xf& landmarks) {
    landmarks.row(2) =
        (landmarks.array().row(2) - depth_offset + pcf.near) / scale;
  }

  static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
                          Eigen::Matrix3Xf& landmarks) {
    landmarks.row(0) =
        landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
    landmarks.row(1) =
        landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
  }

  static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
    landmarks.row(2) *= -1.f;
  }

  static void ConvertLandmarkListToEigenMatrix(
      const NormalizedLandmarkList& landmark_list,
      Eigen::Matrix3Xf& eigen_matrix) {
    eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
    for (int i = 0; i < landmark_list.landmark_size(); ++i) {
      const auto& landmark = landmark_list.landmark(i);
      eigen_matrix(0, i) = landmark.x();
      eigen_matrix(1, i) = landmark.y();
      eigen_matrix(2, i) = landmark.z();
    }
  }

  static void ConvertEigenMatrixToLandmarkList(
      const Eigen::Matrix3Xf& eigen_matrix, LandmarkList& landmark_list) {
    landmark_list.Clear();

    for (int i = 0; i < eigen_matrix.cols(); ++i) {
      auto& landmark = *landmark_list.add_landmark();
      landmark.set_x(eigen_matrix(0, i));
      landmark.set_y(eigen_matrix(1, i));
      landmark.set_z(eigen_matrix(2, i));
    }
  }

  const OriginPointLocation origin_point_location_;
  const InputSource input_source_;
  Eigen::Matrix3Xf canonical_metric_landmarks_;
  Eigen::VectorXf landmark_weights_;

  std::unique_ptr<ProcrustesSolver> procrustes_solver_;
};

class GeometryPipelineImpl : public GeometryPipeline {
 public:
  GeometryPipelineImpl(
      const PerspectiveCamera& perspective_camera,  //
      const Mesh3d& canonical_mesh,                 //
      uint32_t canonical_mesh_vertex_size,          //
      uint32_t canonical_mesh_num_vertices,
      uint32_t canonical_mesh_vertex_position_offset,
      std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
      : perspective_camera_(perspective_camera),
        canonical_mesh_(canonical_mesh),
        canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
        canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
        canonical_mesh_vertex_position_offset_(
            canonical_mesh_vertex_position_offset),
        space_converter_(std::move(space_converter)) {}

  absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(
      const std::vector<NormalizedLandmarkList>& multi_face_landmarks,
      int frame_width, int frame_height) const override {
    MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
        << "Invalid frame dimensions!";

    // Create a perspective camera frustum to be shared for geometry estimation
    // per each face.
    PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
                                 frame_height);

    std::vector<FaceGeometry> multi_face_geometry;

    // From this point, the meaning of "face landmarks" is clarified further as
    // "screen face landmarks". This is done do distinguish from "metric face
    // landmarks" that are derived during the face geometry estimation process.
    for (const NormalizedLandmarkList& screen_face_landmarks :
         multi_face_landmarks) {
      // Having a too compact screen landmark list will result in numerical
      // instabilities, therefore such faces are filtered.
      if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
        continue;
      }

      // Convert the screen landmarks into the metric landmarks and get the pose
      // transformation matrix.
      LandmarkList metric_face_landmarks;
      Eigen::Matrix4f pose_transform_mat;
      MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
                                                   metric_face_landmarks,
                                                   pose_transform_mat))
          << "Failed to convert landmarks from the screen to the metric space!";

      // Pack geometry data for this face.
      FaceGeometry face_geometry;
      Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
      // Copy the canonical face mesh as the face geometry mesh.
      mutable_mesh->CopyFrom(canonical_mesh_);
      // Replace XYZ vertex mesh coodinates with the metric landmark positions.
      for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
        uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
                                        canonical_mesh_vertex_position_offset_;

        mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
                                        metric_face_landmarks.landmark(i).x());
        mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
                                        metric_face_landmarks.landmark(i).y());
        mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
                                        metric_face_landmarks.landmark(i).z());
      }
      // Populate the face pose transformation matrix.
      mediapipe::MatrixDataProtoFromMatrix(
          pose_transform_mat, face_geometry.mutable_pose_transform_matrix());

      multi_face_geometry.push_back(face_geometry);
    }

    return multi_face_geometry;
  }

 private:
  static bool IsScreenLandmarkListTooCompact(
      const NormalizedLandmarkList& screen_landmarks) {
    float mean_x = 0.f;
    float mean_y = 0.f;
    for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
      const auto& landmark = screen_landmarks.landmark(i);
      mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
      mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
    }

    float max_sq_dist = 0.f;
    for (const auto& landmark : screen_landmarks.landmark()) {
      const float d_x = landmark.x() - mean_x;
      const float d_y = landmark.y() - mean_y;
      max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
    }

    static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
    return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
  }

  const PerspectiveCamera perspective_camera_;
  const Mesh3d canonical_mesh_;
  const uint32_t canonical_mesh_vertex_size_;
  const uint32_t canonical_mesh_num_vertices_;
  const uint32_t canonical_mesh_vertex_position_offset_;

  std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
};

}  // namespace

absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
    const Environment& environment, const GeometryPipelineMetadata& metadata) {
  MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
      << "Invalid environment!";
  MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
      << "Invalid geometry pipeline metadata!";

  const auto& canonical_mesh = metadata.canonical_mesh();
  RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
                               VertexComponent::POSITION))
      << "Canonical face mesh must have the `POSITION` vertex component!";
  RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
                               VertexComponent::TEX_COORD))
      << "Canonical face mesh must have the `TEX_COORD` vertex component!";

  uint32_t canonical_mesh_vertex_size =
      GetVertexSize(canonical_mesh.vertex_type());
  uint32_t canonical_mesh_num_vertices =
      canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
  uint32_t canonical_mesh_vertex_position_offset =
      GetVertexComponentOffset(canonical_mesh.vertex_type(),
                               VertexComponent::POSITION)
          .value();

  // Put the Procrustes landmark basis into Eigen matrices for an easier access.
  Eigen::Matrix3Xf canonical_metric_landmarks =
      Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
  Eigen::VectorXf landmark_weights =
      Eigen::VectorXf::Zero(canonical_mesh_num_vertices);

  for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
    uint32_t vertex_buffer_offset =
        canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;

    canonical_metric_landmarks(0, i) =
        canonical_mesh.vertex_buffer(vertex_buffer_offset);
    canonical_metric_landmarks(1, i) =
        canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
    canonical_metric_landmarks(2, i) =
        canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
  }

  for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {
    uint32_t landmark_id = wlr.landmark_id();
    landmark_weights(landmark_id) = wlr.weight();
  }

  std::unique_ptr<GeometryPipeline> result =
      absl::make_unique<GeometryPipelineImpl>(
          environment.perspective_camera(), canonical_mesh,
          canonical_mesh_vertex_size, canonical_mesh_num_vertices,
          canonical_mesh_vertex_position_offset,
          absl::make_unique<ScreenToMetricSpaceConverter>(
              environment.origin_point_location(),
              metadata.input_source() == InputSource::DEFAULT
                  ? InputSource::FACE_LANDMARK_PIPELINE
                  : metadata.input_source(),
              std::move(canonical_metric_landmarks),
              std::move(landmark_weights),
              CreateFloatPrecisionProcrustesSolver()));

  return result;
}

}  // namespace mediapipe::face_geometry
code fill 2022-03-01 13:04:01 +01:00			`// Copyright 2020 The MediaPipe Authors.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"`

			`#include <cmath>`
			`#include <cstdint>`
			`#include <memory>`
			`#include <utility>`
			`#include <vector>`

			`#include "Eigen/Core"`
			`#include "absl/memory/memory.h"`
			`#include "mediapipe/framework/formats/landmark.pb.h"`
			`#include "mediapipe/framework/formats/matrix.h"`
			`#include "mediapipe/framework/formats/matrix_data.pb.h"`
			`#include "mediapipe/framework/port/ret_check.h"`
			`#include "mediapipe/framework/port/status.h"`
			`#include "mediapipe/framework/port/status_macros.h"`
			`#include "mediapipe/framework/port/statusor.h"`
			`#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"`
			`#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"`
			`#include "mediapipe/modules/face_geometry/libs/validation_utils.h"`
			`#include "mediapipe/modules/face_geometry/protos/environment.pb.h"`
			`#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"`
			`#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"`
			`#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"`

			`namespace mediapipe::face_geometry {`
			`namespace {`

			`struct PerspectiveCameraFrustum {`
			`// NOTE: all arguments must be validated prior to calling this constructor.`
			`PerspectiveCameraFrustum(const PerspectiveCamera& perspective_camera,`
			`int frame_width, int frame_height) {`
			`static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;`

			`const float height_at_near =`
			`2.f * perspective_camera.near() *`
			`std::tan(0.5f * kDegreesToRadians *`
			`perspective_camera.vertical_fov_degrees());`

			`const float width_at_near = frame_width * height_at_near / frame_height;`

			`left = -0.5f * width_at_near;`
			`right = 0.5f * width_at_near;`
			`bottom = -0.5f * height_at_near;`
			`top = 0.5f * height_at_near;`
			`near = perspective_camera.near();`
			`far = perspective_camera.far();`
			`}`

			`float left;`
			`float right;`
			`float bottom;`
			`float top;`
			`float near;`
			`float far;`
			`};`

			`class ScreenToMetricSpaceConverter {`
			`public:`
			`ScreenToMetricSpaceConverter(`
			`OriginPointLocation origin_point_location, //`
			`InputSource input_source, //`
			`Eigen::Matrix3Xf&& canonical_metric_landmarks, //`
			`Eigen::VectorXf&& landmark_weights, //`
			`std::unique_ptr<ProcrustesSolver> procrustes_solver)`
			`: origin_point_location_(origin_point_location),`
			`input_source_(input_source),`
			`canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),`
			`landmark_weights_(std::move(landmark_weights)),`
			`procrustes_solver_(std::move(procrustes_solver)) {}`

			// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
			// the `pose_transform_mat`.
			`//`
			`// Here's the algorithm summary:`
			`//`
			`// (1) Project X- and Y- screen landmark coordinates at the Z near plane.`
			`//`
			`// (2) Estimate a canonical-to-runtime landmark set scale by running the`
			`// Procrustes solver using the screen runtime landmarks.`
			`//`
			`// On this iteration, screen landmarks are used instead of unprojected`
			`// metric landmarks as it is not safe to unproject due to the relative`
			`// nature of the input screen landmark Z coordinate.`
			`//`
			`// (3) Use the canonical-to-runtime scale from (2) to unproject the screen`
			`// landmarks. The result is referenced as "intermediate landmarks" because`
			`// they are the first estimation of the resuling metric landmarks, but are`
			`// not quite there yet.`
			`//`
			`// (4) Estimate a canonical-to-runtime landmark set scale by running the`
			`// Procrustes solver using the intermediate runtime landmarks.`
			`//`
			`// (5) Use the product of the scale factors from (2) and (4) to unproject`
			`// the screen landmarks the second time. This is the second and the final`
			`// estimation of the metric landmarks.`
			`//`
			`// (6) Multiply each of the metric landmarks by the inverse pose`
			`// transformation matrix to align the runtime metric face landmarks with`
			`// the canonical metric face landmarks.`
			`//`
			`// Note: the input screen landmarks are in the left-handed coordinate system,`
			`// however any metric landmarks - including the canonical metric`
			`// landmarks, the final runtime metric landmarks and any intermediate`
			`// runtime metric landmarks - are in the right-handed coordinate system.`
			`//`
			`// To keep the logic correct, the landmark set handedness is changed any`
			`// time the screen-to-metric semantic barrier is passed.`
			`absl::Status Convert(const NormalizedLandmarkList& screen_landmark_list, //`
			`const PerspectiveCameraFrustum& pcf, //`
			`LandmarkList& metric_landmark_list, //`
			`Eigen::Matrix4f& pose_transform_mat) const {`
			`RET_CHECK_EQ(screen_landmark_list.landmark_size(),`
			`canonical_metric_landmarks_.cols())`
			`<< "The number of landmarks doesn't match the number passed upon "`
			`"initialization!";`

			`Eigen::Matrix3Xf screen_landmarks;`
			`ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);`

			`ProjectXY(pcf, screen_landmarks);`
			`const float depth_offset = screen_landmarks.row(2).mean();`

			`// 1st iteration: don't unproject XY because it's unsafe to do so due to`
			`// the relative nature of the Z coordinate. Instead, run the`
			`// first estimation on the projected XY and use that scale to`
			`// unproject for the 2nd iteration.`
			`Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);`
			`ChangeHandedness(intermediate_landmarks);`

			`ASSIGN_OR_RETURN(const float first_iteration_scale,`
			`EstimateScale(intermediate_landmarks),`
			`_ << "Failed to estimate first iteration scale!");`

			`// 2nd iteration: unproject XY using the scale from the 1st iteration.`
			`intermediate_landmarks = screen_landmarks;`
			`MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,`
			`intermediate_landmarks);`
			`UnprojectXY(pcf, intermediate_landmarks);`
			`ChangeHandedness(intermediate_landmarks);`

			`// For face detection input landmarks, re-write Z-coord from the canonical`
			`// landmarks.`
			`if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {`
			`Eigen::Matrix4f intermediate_pose_transform_mat;`
			`MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(`
			`canonical_metric_landmarks_, intermediate_landmarks,`
			`landmark_weights_, intermediate_pose_transform_mat))`
			`<< "Failed to estimate pose transform matrix!";`

			`intermediate_landmarks.row(2) =`
			`(intermediate_pose_transform_mat *`
			`canonical_metric_landmarks_.colwise().homogeneous())`
			`.row(2);`
			`}`
			`ASSIGN_OR_RETURN(const float second_iteration_scale,`
			`EstimateScale(intermediate_landmarks),`
			`_ << "Failed to estimate second iteration scale!");`

			`// Use the total scale to unproject the screen landmarks.`
			`const float total_scale = first_iteration_scale * second_iteration_scale;`
			`MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);`
			`UnprojectXY(pcf, screen_landmarks);`
			`ChangeHandedness(screen_landmarks);`

			`// At this point, screen landmarks are converted into metric landmarks.`
			`Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;`

			`MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(`
			`canonical_metric_landmarks_, metric_landmarks, landmark_weights_,`
			`pose_transform_mat))`
			`<< "Failed to estimate pose transform matrix!";`

			`// For face detection input landmarks, re-write Z-coord from the canonical`
			`// landmarks and run the pose transform estimation again.`
			`if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {`
			`metric_landmarks.row(2) =`
			`(pose_transform_mat *`
			`canonical_metric_landmarks_.colwise().homogeneous())`
			`.row(2);`

			`MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(`
			`canonical_metric_landmarks_, metric_landmarks, landmark_weights_,`
			`pose_transform_mat))`
			`<< "Failed to estimate pose transform matrix!";`
			`}`

			`// Multiply each of the metric landmarks by the inverse pose`
			`// transformation matrix to align the runtime metric face landmarks with`
			`// the canonical metric face landmarks.`
			`metric_landmarks = (pose_transform_mat.inverse() *`
			`metric_landmarks.colwise().homogeneous())`
			`.topRows(3);`

			`ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);`

			`return absl::OkStatus();`
			`}`

			`private:`
			`void ProjectXY(const PerspectiveCameraFrustum& pcf,`
			`Eigen::Matrix3Xf& landmarks) const {`
			`float x_scale = pcf.right - pcf.left;`
			`float y_scale = pcf.top - pcf.bottom;`
			`float x_translation = pcf.left;`
			`float y_translation = pcf.bottom;`

			`if (origin_point_location_ == OriginPointLocation::TOP_LEFT_CORNER) {`
			`landmarks.row(1) = 1.f - landmarks.row(1).array();`
			`}`

			`landmarks =`
			`landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);`
			`landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);`
			`}`

			`absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {`
			`Eigen::Matrix4f transform_mat;`
			`MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(`
			`canonical_metric_landmarks_, landmarks, landmark_weights_,`
			`transform_mat))`
			`<< "Failed to estimate canonical-to-runtime landmark set transform!";`

			`return transform_mat.col(0).norm();`
			`}`

			`static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,`
			`float depth_offset, float scale,`
			`Eigen::Matrix3Xf& landmarks) {`
			`landmarks.row(2) =`
			`(landmarks.array().row(2) - depth_offset + pcf.near) / scale;`
			`}`

			`static void UnprojectXY(const PerspectiveCameraFrustum& pcf,`
			`Eigen::Matrix3Xf& landmarks) {`
			`landmarks.row(0) =`
			`landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;`
			`landmarks.row(1) =`
			`landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;`
			`}`

			`static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {`
			`landmarks.row(2) *= -1.f;`
			`}`

			`static void ConvertLandmarkListToEigenMatrix(`
			`const NormalizedLandmarkList& landmark_list,`
			`Eigen::Matrix3Xf& eigen_matrix) {`
			`eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());`
			`for (int i = 0; i < landmark_list.landmark_size(); ++i) {`
			`const auto& landmark = landmark_list.landmark(i);`
			`eigen_matrix(0, i) = landmark.x();`
			`eigen_matrix(1, i) = landmark.y();`
			`eigen_matrix(2, i) = landmark.z();`
			`}`
			`}`

			`static void ConvertEigenMatrixToLandmarkList(`
			`const Eigen::Matrix3Xf& eigen_matrix, LandmarkList& landmark_list) {`
			`landmark_list.Clear();`

			`for (int i = 0; i < eigen_matrix.cols(); ++i) {`
			`auto& landmark = *landmark_list.add_landmark();`
			`landmark.set_x(eigen_matrix(0, i));`
			`landmark.set_y(eigen_matrix(1, i));`
			`landmark.set_z(eigen_matrix(2, i));`
			`}`
			`}`

			`const OriginPointLocation origin_point_location_;`
			`const InputSource input_source_;`
			`Eigen::Matrix3Xf canonical_metric_landmarks_;`
			`Eigen::VectorXf landmark_weights_;`

			`std::unique_ptr<ProcrustesSolver> procrustes_solver_;`
			`};`

			`class GeometryPipelineImpl : public GeometryPipeline {`
			`public:`
			`GeometryPipelineImpl(`
			`const PerspectiveCamera& perspective_camera, //`
			`const Mesh3d& canonical_mesh, //`
			`uint32_t canonical_mesh_vertex_size, //`
			`uint32_t canonical_mesh_num_vertices,`
			`uint32_t canonical_mesh_vertex_position_offset,`
			`std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)`
			`: perspective_camera_(perspective_camera),`
			`canonical_mesh_(canonical_mesh),`
			`canonical_mesh_vertex_size_(canonical_mesh_vertex_size),`
			`canonical_mesh_num_vertices_(canonical_mesh_num_vertices),`
			`canonical_mesh_vertex_position_offset_(`
			`canonical_mesh_vertex_position_offset),`
			`space_converter_(std::move(space_converter)) {}`

			`absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(`
			`const std::vector<NormalizedLandmarkList>& multi_face_landmarks,`
			`int frame_width, int frame_height) const override {`
			`MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))`
			`<< "Invalid frame dimensions!";`

			`// Create a perspective camera frustum to be shared for geometry estimation`
			`// per each face.`
			`PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,`
			`frame_height);`

			`std::vector<FaceGeometry> multi_face_geometry;`

			`// From this point, the meaning of "face landmarks" is clarified further as`
			`// "screen face landmarks". This is done do distinguish from "metric face`
			`// landmarks" that are derived during the face geometry estimation process.`
			`for (const NormalizedLandmarkList& screen_face_landmarks :`
			`multi_face_landmarks) {`
			`// Having a too compact screen landmark list will result in numerical`
			`// instabilities, therefore such faces are filtered.`
			`if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {`
			`continue;`
			`}`

			`// Convert the screen landmarks into the metric landmarks and get the pose`
			`// transformation matrix.`
			`LandmarkList metric_face_landmarks;`
			`Eigen::Matrix4f pose_transform_mat;`
			`MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,`
			`metric_face_landmarks,`
			`pose_transform_mat))`
			`<< "Failed to convert landmarks from the screen to the metric space!";`

			`// Pack geometry data for this face.`
			`FaceGeometry face_geometry;`
			`Mesh3d* mutable_mesh = face_geometry.mutable_mesh();`
			`// Copy the canonical face mesh as the face geometry mesh.`
			`mutable_mesh->CopyFrom(canonical_mesh_);`
			`// Replace XYZ vertex mesh coodinates with the metric landmark positions.`
			`for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {`
			`uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +`
			`canonical_mesh_vertex_position_offset_;`

			`mutable_mesh->set_vertex_buffer(vertex_buffer_offset,`
			`metric_face_landmarks.landmark(i).x());`
			`mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,`
			`metric_face_landmarks.landmark(i).y());`
			`mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,`
			`metric_face_landmarks.landmark(i).z());`
			`}`
			`// Populate the face pose transformation matrix.`
			`mediapipe::MatrixDataProtoFromMatrix(`
			`pose_transform_mat, face_geometry.mutable_pose_transform_matrix());`

			`multi_face_geometry.push_back(face_geometry);`
			`}`

			`return multi_face_geometry;`
			`}`

			`private:`
			`static bool IsScreenLandmarkListTooCompact(`
			`const NormalizedLandmarkList& screen_landmarks) {`
			`float mean_x = 0.f;`
			`float mean_y = 0.f;`
			`for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {`
			`const auto& landmark = screen_landmarks.landmark(i);`
			`mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);`
			`mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);`
			`}`

			`float max_sq_dist = 0.f;`
			`for (const auto& landmark : screen_landmarks.landmark()) {`
			`const float d_x = landmark.x() - mean_x;`
			`const float d_y = landmark.y() - mean_y;`
			`max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);`
			`}`

			`static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;`
			`return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;`
			`}`

			`const PerspectiveCamera perspective_camera_;`
			`const Mesh3d canonical_mesh_;`
			`const uint32_t canonical_mesh_vertex_size_;`
			`const uint32_t canonical_mesh_num_vertices_;`
			`const uint32_t canonical_mesh_vertex_position_offset_;`

			`std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;`
			`};`

			`} // namespace`

			`absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(`
			`const Environment& environment, const GeometryPipelineMetadata& metadata) {`
			`MP_RETURN_IF_ERROR(ValidateEnvironment(environment))`
			`<< "Invalid environment!";`
			`MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))`
			`<< "Invalid geometry pipeline metadata!";`

			`const auto& canonical_mesh = metadata.canonical_mesh();`
			`RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),`
			`VertexComponent::POSITION))`
			<< "Canonical face mesh must have the `POSITION` vertex component!";
			`RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),`
			`VertexComponent::TEX_COORD))`
			<< "Canonical face mesh must have the `TEX_COORD` vertex component!";

			`uint32_t canonical_mesh_vertex_size =`
			`GetVertexSize(canonical_mesh.vertex_type());`
			`uint32_t canonical_mesh_num_vertices =`
			`canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;`
			`uint32_t canonical_mesh_vertex_position_offset =`
			`GetVertexComponentOffset(canonical_mesh.vertex_type(),`
			`VertexComponent::POSITION)`
			`.value();`

			`// Put the Procrustes landmark basis into Eigen matrices for an easier access.`
			`Eigen::Matrix3Xf canonical_metric_landmarks =`
			`Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);`
			`Eigen::VectorXf landmark_weights =`
			`Eigen::VectorXf::Zero(canonical_mesh_num_vertices);`

			`for (int i = 0; i < canonical_mesh_num_vertices; ++i) {`
			`uint32_t vertex_buffer_offset =`
			`canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;`

			`canonical_metric_landmarks(0, i) =`
			`canonical_mesh.vertex_buffer(vertex_buffer_offset);`
			`canonical_metric_landmarks(1, i) =`
			`canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);`
			`canonical_metric_landmarks(2, i) =`
			`canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);`
			`}`

			`for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {`
			`uint32_t landmark_id = wlr.landmark_id();`
			`landmark_weights(landmark_id) = wlr.weight();`
			`}`

			`std::unique_ptr<GeometryPipeline> result =`
			`absl::make_unique<GeometryPipelineImpl>(`
			`environment.perspective_camera(), canonical_mesh,`
			`canonical_mesh_vertex_size, canonical_mesh_num_vertices,`
			`canonical_mesh_vertex_position_offset,`
			`absl::make_unique<ScreenToMetricSpaceConverter>(`
			`environment.origin_point_location(),`
			`metadata.input_source() == InputSource::DEFAULT`
			`? InputSource::FACE_LANDMARK_PIPELINE`
			`: metadata.input_source(),`
			`std::move(canonical_metric_landmarks),`
			`std::move(landmark_weights),`
			`CreateFloatPrecisionProcrustesSolver()));`

			`return result;`
			`}`

			`} // namespace mediapipe::face_geometry`