mediapipe-rs/mediapipe/modules/objectron/calculators/epnp.cc

// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/modules/objectron/calculators/epnp.h"

namespace mediapipe {

namespace {

// NUmber of keypoints.
constexpr int kNumKeypoints = 9;

using Eigen::Map;
using Eigen::Matrix;
using Eigen::Matrix4f;
using Eigen::Vector2f;
using Eigen::Vector3f;

}  // namespace

absl::Status SolveEpnp(const float focal_x, const float focal_y,
                       const float center_x, const float center_y,
                       const bool portrait,
                       const std::vector<Vector2f>& input_points_2d,
                       std::vector<Vector3f>* output_points_3d) {
  if (input_points_2d.size() != kNumKeypoints) {
    return absl::InvalidArgumentError(
        absl::StrFormat("Input must has %d 2D points.", kNumKeypoints));
  }

  if (output_points_3d == nullptr) {
    return absl::InvalidArgumentError(
        "Output pointer output_points_3d is Null.");
  }

  Matrix<float, (kNumKeypoints - 1) * 2, 12> m =
      Matrix<float, (kNumKeypoints - 1) * 2, 12>::Zero();

  Matrix<float, kNumKeypoints - 1, 4> epnp_alpha;
  // The epnp_alpha is the Nx4 weight matrix from the EPnP paper, which is used
  // to express the N box vertices as the weighted sum of 4 control points. The
  // value of epnp_alpha is depedent on the set of control points been used.
  // In our case we used the 4 control points as below (coordinates are in world
  // coordinate system):
  //     c0 = (0.0, 0.0, 0.0)  // Box center
  //     c1 = (1.0, 0.0, 0.0)  // Right face center
  //     c2 = (0.0, 1.0, 0.0)  // Top face center
  //     c3 = (0.0, 0.0, 1.0)  // Front face center
  //
  //       3 + + + + + + + + 7
  //       +\                +\          UP
  //       + \               + \
  //       +  \              +  \        |
  //       +   4 + + + + + + + + 8       | y
  //       +   +             +   +       |
  //       +   +             +   +       |
  //       +   +     (0)     +   +       .------- x
  //       +   +             +   +        \
  //       1 + + + + + + + + 5   +         \
  //        \  +              \  +          \ z
  //         \ +               \ +           \
  //          \+                \+
  //           2 + + + + + + + + 6
  //
  // For each box vertex shown above, we have the below weighted sum expression:
  //   v1 = c0 - (c1 - c0) - (c2 - c0) - (c3 - c0) = 4*c0 - c1 - c2 - c3;
  //   v2 = c0 - (c1 - c0) - (c2 - c0) + (c3 - c0) = 2*c0 - c1 - c2 + c3;
  //   v3 = c0 - (c1 - c0) + (c2 - c0) - (c3 - c0) = 2*c0 - c1 + c2 - c3;
  //   ...
  // Thus we can determine the value of epnp_alpha as been used below.
  //
  // clang-format off
  epnp_alpha << 4.0f, -1.0f, -1.0f, -1.0f,
                2.0f, -1.0f, -1.0f,  1.0f,
                2.0f, -1.0f,  1.0f, -1.0f,
                0.0f, -1.0f,  1.0f,  1.0f,
                2.0f,  1.0f, -1.0f, -1.0f,
                0.0f,  1.0f, -1.0f,  1.0f,
                0.0f,  1.0f,  1.0f, -1.0f,
               -2.0f,  1.0f,  1.0f,  1.0f;
  // clang-format on

  for (int i = 0; i < input_points_2d.size() - 1; ++i) {
    // Skip 0th landmark which is object center.
    const auto& point_2d = input_points_2d[i + 1];

    // Convert 2d point from `pixel coordinates` to `NDC coordinates`([-1, 1])
    // following to the definitions in:
    // https://google.github.io/mediapipe/solutions/objectron#ndc-space
    // If portrait mode is been used, it's the caller's responsibility to
    // convert the input 2d points' coordinates.
    float x_ndc, y_ndc;
    if (portrait) {
      x_ndc = point_2d.y() * 2 - 1;
      y_ndc = point_2d.x() * 2 - 1;
    } else {
      x_ndc = point_2d.x() * 2 - 1;
      y_ndc = 1 - point_2d.y() * 2;
    }

    for (int j = 0; j < 4; ++j) {
      // For each of the 4 control points, formulate two rows of the
      // m matrix (two equations).
      const float control_alpha = epnp_alpha(i, j);
      m(i * 2, j * 3) = focal_x * control_alpha;
      m(i * 2, j * 3 + 2) = (center_x + x_ndc) * control_alpha;
      m(i * 2 + 1, j * 3 + 1) = focal_y * control_alpha;
      m(i * 2 + 1, j * 3 + 2) = (center_y + y_ndc) * control_alpha;
    }
  }
  // This is a self adjoint matrix. Use SelfAdjointEigenSolver for a fast
  // and stable solution.
  Matrix<float, 12, 12> mt_m = m.transpose() * m;
  Eigen::SelfAdjointEigenSolver<Matrix<float, 12, 12>> eigen_solver(mt_m);
  if (eigen_solver.info() != Eigen::Success) {
    return absl::AbortedError("Eigen decomposition failed.");
  }
  CHECK_EQ(12, eigen_solver.eigenvalues().size());

  // Eigenvalues are sorted in increasing order for SelfAdjointEigenSolver
  // only! If you use other Eigen Solvers, it's not guaranteed to be in
  // increasing order. Here, we just take the eigen vector corresponding
  // to first/smallest eigen value, since we used SelfAdjointEigenSolver.
  Eigen::VectorXf eigen_vec = eigen_solver.eigenvectors().col(0);
  Map<Matrix<float, 4, 3, Eigen::RowMajor>> control_matrix(eigen_vec.data());

  // All 3D points should be in front of camera (z < 0).
  if (control_matrix(0, 2) > 0) {
    control_matrix = -control_matrix;
  }
  Matrix<float, kNumKeypoints - 1, 3> vertices = epnp_alpha * control_matrix;

  // Fill 0th 3D points.
  output_points_3d->emplace_back(control_matrix(0, 0), control_matrix(0, 1),
                                 control_matrix(0, 2));
  // Fill the rest 3D points.
  for (int i = 0; i < kNumKeypoints - 1; ++i) {
    output_points_3d->emplace_back(vertices(i, 0), vertices(i, 1),
                                   vertices(i, 2));
  }
  return absl::OkStatus();
}

absl::Status SolveEpnp(const Eigen::Matrix4f& projection_matrix,
                       const bool portrait,
                       const std::vector<Vector2f>& input_points_2d,
                       std::vector<Vector3f>* output_points_3d) {
  const float focal_x = projection_matrix(0, 0);
  const float focal_y = projection_matrix(1, 1);
  const float center_x = projection_matrix(0, 2);
  const float center_y = projection_matrix(1, 2);
  return SolveEpnp(focal_x, focal_y, center_x, center_y, portrait,
                   input_points_2d, output_points_3d);
}

}  // namespace mediapipe
code fill 2022-03-01 13:04:01 +01:00			`// Copyright 2021 The MediaPipe Authors.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`#include "mediapipe/modules/objectron/calculators/epnp.h"`

			`namespace mediapipe {`

			`namespace {`

			`// NUmber of keypoints.`
			`constexpr int kNumKeypoints = 9;`

			`using Eigen::Map;`
			`using Eigen::Matrix;`
			`using Eigen::Matrix4f;`
			`using Eigen::Vector2f;`
			`using Eigen::Vector3f;`

			`} // namespace`

			`absl::Status SolveEpnp(const float focal_x, const float focal_y,`
			`const float center_x, const float center_y,`
			`const bool portrait,`
			`const std::vector<Vector2f>& input_points_2d,`
			`std::vector<Vector3f>* output_points_3d) {`
			`if (input_points_2d.size() != kNumKeypoints) {`
			`return absl::InvalidArgumentError(`
			`absl::StrFormat("Input must has %d 2D points.", kNumKeypoints));`
			`}`

			`if (output_points_3d == nullptr) {`
			`return absl::InvalidArgumentError(`
			`"Output pointer output_points_3d is Null.");`
			`}`

			`Matrix<float, (kNumKeypoints - 1) * 2, 12> m =`
			`Matrix<float, (kNumKeypoints - 1) * 2, 12>::Zero();`

			`Matrix<float, kNumKeypoints - 1, 4> epnp_alpha;`
			`// The epnp_alpha is the Nx4 weight matrix from the EPnP paper, which is used`
			`// to express the N box vertices as the weighted sum of 4 control points. The`
			`// value of epnp_alpha is depedent on the set of control points been used.`
			`// In our case we used the 4 control points as below (coordinates are in world`
			`// coordinate system):`
			`// c0 = (0.0, 0.0, 0.0) // Box center`
			`// c1 = (1.0, 0.0, 0.0) // Right face center`
			`// c2 = (0.0, 1.0, 0.0) // Top face center`
			`// c3 = (0.0, 0.0, 1.0) // Front face center`
			`//`
			`// 3 + + + + + + + + 7`
			`// +\ +\ UP`
			`// + \ + \`
			`// + \ + \ \|`
			`// + 4 + + + + + + + + 8 \| y`
			`// + + + + \|`
			`// + + + + \|`
			`// + + (0) + + .------- x`
			`// + + + + \`
			`// 1 + + + + + + + + 5 + \`
			`// \ + \ + \ z`
			`// \ + \ + \`
			`// \+ \+`
			`// 2 + + + + + + + + 6`
			`//`
			`// For each box vertex shown above, we have the below weighted sum expression:`
			`// v1 = c0 - (c1 - c0) - (c2 - c0) - (c3 - c0) = 4*c0 - c1 - c2 - c3;`
			`// v2 = c0 - (c1 - c0) - (c2 - c0) + (c3 - c0) = 2*c0 - c1 - c2 + c3;`
			`// v3 = c0 - (c1 - c0) + (c2 - c0) - (c3 - c0) = 2*c0 - c1 + c2 - c3;`
			`// ...`
			`// Thus we can determine the value of epnp_alpha as been used below.`
			`//`
			`// clang-format off`
			`epnp_alpha << 4.0f, -1.0f, -1.0f, -1.0f,`
			`2.0f, -1.0f, -1.0f, 1.0f,`
			`2.0f, -1.0f, 1.0f, -1.0f,`
			`0.0f, -1.0f, 1.0f, 1.0f,`
			`2.0f, 1.0f, -1.0f, -1.0f,`
			`0.0f, 1.0f, -1.0f, 1.0f,`
			`0.0f, 1.0f, 1.0f, -1.0f,`
			`-2.0f, 1.0f, 1.0f, 1.0f;`
			`// clang-format on`

			`for (int i = 0; i < input_points_2d.size() - 1; ++i) {`
			`// Skip 0th landmark which is object center.`
			`const auto& point_2d = input_points_2d[i + 1];`

			// Convert 2d point from `pixel coordinates` to `NDC coordinates`([-1, 1])
			`// following to the definitions in:`
			`// https://google.github.io/mediapipe/solutions/objectron#ndc-space`
			`// If portrait mode is been used, it's the caller's responsibility to`
			`// convert the input 2d points' coordinates.`
			`float x_ndc, y_ndc;`
			`if (portrait) {`
			`x_ndc = point_2d.y() * 2 - 1;`
			`y_ndc = point_2d.x() * 2 - 1;`
			`} else {`
			`x_ndc = point_2d.x() * 2 - 1;`
			`y_ndc = 1 - point_2d.y() * 2;`
			`}`

			`for (int j = 0; j < 4; ++j) {`
			`// For each of the 4 control points, formulate two rows of the`
			`// m matrix (two equations).`
			`const float control_alpha = epnp_alpha(i, j);`
			`m(i * 2, j * 3) = focal_x * control_alpha;`
			`m(i * 2, j * 3 + 2) = (center_x + x_ndc) * control_alpha;`
			`m(i * 2 + 1, j * 3 + 1) = focal_y * control_alpha;`
			`m(i * 2 + 1, j * 3 + 2) = (center_y + y_ndc) * control_alpha;`
			`}`
			`}`
			`// This is a self adjoint matrix. Use SelfAdjointEigenSolver for a fast`
			`// and stable solution.`
			`Matrix<float, 12, 12> mt_m = m.transpose() * m;`
			`Eigen::SelfAdjointEigenSolver<Matrix<float, 12, 12>> eigen_solver(mt_m);`
			`if (eigen_solver.info() != Eigen::Success) {`
			`return absl::AbortedError("Eigen decomposition failed.");`
			`}`
			`CHECK_EQ(12, eigen_solver.eigenvalues().size());`

			`// Eigenvalues are sorted in increasing order for SelfAdjointEigenSolver`
			`// only! If you use other Eigen Solvers, it's not guaranteed to be in`
			`// increasing order. Here, we just take the eigen vector corresponding`
			`// to first/smallest eigen value, since we used SelfAdjointEigenSolver.`
			`Eigen::VectorXf eigen_vec = eigen_solver.eigenvectors().col(0);`
			`Map<Matrix<float, 4, 3, Eigen::RowMajor>> control_matrix(eigen_vec.data());`

			`// All 3D points should be in front of camera (z < 0).`
			`if (control_matrix(0, 2) > 0) {`
			`control_matrix = -control_matrix;`
			`}`
			`Matrix<float, kNumKeypoints - 1, 3> vertices = epnp_alpha * control_matrix;`

			`// Fill 0th 3D points.`
			`output_points_3d->emplace_back(control_matrix(0, 0), control_matrix(0, 1),`
			`control_matrix(0, 2));`
			`// Fill the rest 3D points.`
			`for (int i = 0; i < kNumKeypoints - 1; ++i) {`
			`output_points_3d->emplace_back(vertices(i, 0), vertices(i, 1),`
			`vertices(i, 2));`
			`}`
			`return absl::OkStatus();`
			`}`

			`absl::Status SolveEpnp(const Eigen::Matrix4f& projection_matrix,`
			`const bool portrait,`
			`const std::vector<Vector2f>& input_points_2d,`
			`std::vector<Vector3f>* output_points_3d) {`
			`const float focal_x = projection_matrix(0, 0);`
			`const float focal_y = projection_matrix(1, 1);`
			`const float center_x = projection_matrix(0, 2);`
			`const float center_y = projection_matrix(1, 2);`
			`return SolveEpnp(focal_x, focal_y, center_x, center_y, portrait,`
			`input_points_2d, output_points_3d);`
			`}`

			`} // namespace mediapipe`