168 lines
6.6 KiB
C++
168 lines
6.6 KiB
C++
|
// Copyright 2021 The MediaPipe Authors.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#include "mediapipe/modules/objectron/calculators/epnp.h"
|
||
|
|
||
|
namespace mediapipe {
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
// NUmber of keypoints.
|
||
|
constexpr int kNumKeypoints = 9;
|
||
|
|
||
|
using Eigen::Map;
|
||
|
using Eigen::Matrix;
|
||
|
using Eigen::Matrix4f;
|
||
|
using Eigen::Vector2f;
|
||
|
using Eigen::Vector3f;
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
absl::Status SolveEpnp(const float focal_x, const float focal_y,
|
||
|
const float center_x, const float center_y,
|
||
|
const bool portrait,
|
||
|
const std::vector<Vector2f>& input_points_2d,
|
||
|
std::vector<Vector3f>* output_points_3d) {
|
||
|
if (input_points_2d.size() != kNumKeypoints) {
|
||
|
return absl::InvalidArgumentError(
|
||
|
absl::StrFormat("Input must has %d 2D points.", kNumKeypoints));
|
||
|
}
|
||
|
|
||
|
if (output_points_3d == nullptr) {
|
||
|
return absl::InvalidArgumentError(
|
||
|
"Output pointer output_points_3d is Null.");
|
||
|
}
|
||
|
|
||
|
Matrix<float, (kNumKeypoints - 1) * 2, 12> m =
|
||
|
Matrix<float, (kNumKeypoints - 1) * 2, 12>::Zero();
|
||
|
|
||
|
Matrix<float, kNumKeypoints - 1, 4> epnp_alpha;
|
||
|
// The epnp_alpha is the Nx4 weight matrix from the EPnP paper, which is used
|
||
|
// to express the N box vertices as the weighted sum of 4 control points. The
|
||
|
// value of epnp_alpha is depedent on the set of control points been used.
|
||
|
// In our case we used the 4 control points as below (coordinates are in world
|
||
|
// coordinate system):
|
||
|
// c0 = (0.0, 0.0, 0.0) // Box center
|
||
|
// c1 = (1.0, 0.0, 0.0) // Right face center
|
||
|
// c2 = (0.0, 1.0, 0.0) // Top face center
|
||
|
// c3 = (0.0, 0.0, 1.0) // Front face center
|
||
|
//
|
||
|
// 3 + + + + + + + + 7
|
||
|
// +\ +\ UP
|
||
|
// + \ + \
|
||
|
// + \ + \ |
|
||
|
// + 4 + + + + + + + + 8 | y
|
||
|
// + + + + |
|
||
|
// + + + + |
|
||
|
// + + (0) + + .------- x
|
||
|
// + + + + \
|
||
|
// 1 + + + + + + + + 5 + \
|
||
|
// \ + \ + \ z
|
||
|
// \ + \ + \
|
||
|
// \+ \+
|
||
|
// 2 + + + + + + + + 6
|
||
|
//
|
||
|
// For each box vertex shown above, we have the below weighted sum expression:
|
||
|
// v1 = c0 - (c1 - c0) - (c2 - c0) - (c3 - c0) = 4*c0 - c1 - c2 - c3;
|
||
|
// v2 = c0 - (c1 - c0) - (c2 - c0) + (c3 - c0) = 2*c0 - c1 - c2 + c3;
|
||
|
// v3 = c0 - (c1 - c0) + (c2 - c0) - (c3 - c0) = 2*c0 - c1 + c2 - c3;
|
||
|
// ...
|
||
|
// Thus we can determine the value of epnp_alpha as been used below.
|
||
|
//
|
||
|
// clang-format off
|
||
|
epnp_alpha << 4.0f, -1.0f, -1.0f, -1.0f,
|
||
|
2.0f, -1.0f, -1.0f, 1.0f,
|
||
|
2.0f, -1.0f, 1.0f, -1.0f,
|
||
|
0.0f, -1.0f, 1.0f, 1.0f,
|
||
|
2.0f, 1.0f, -1.0f, -1.0f,
|
||
|
0.0f, 1.0f, -1.0f, 1.0f,
|
||
|
0.0f, 1.0f, 1.0f, -1.0f,
|
||
|
-2.0f, 1.0f, 1.0f, 1.0f;
|
||
|
// clang-format on
|
||
|
|
||
|
for (int i = 0; i < input_points_2d.size() - 1; ++i) {
|
||
|
// Skip 0th landmark which is object center.
|
||
|
const auto& point_2d = input_points_2d[i + 1];
|
||
|
|
||
|
// Convert 2d point from `pixel coordinates` to `NDC coordinates`([-1, 1])
|
||
|
// following to the definitions in:
|
||
|
// https://google.github.io/mediapipe/solutions/objectron#ndc-space
|
||
|
// If portrait mode is been used, it's the caller's responsibility to
|
||
|
// convert the input 2d points' coordinates.
|
||
|
float x_ndc, y_ndc;
|
||
|
if (portrait) {
|
||
|
x_ndc = point_2d.y() * 2 - 1;
|
||
|
y_ndc = point_2d.x() * 2 - 1;
|
||
|
} else {
|
||
|
x_ndc = point_2d.x() * 2 - 1;
|
||
|
y_ndc = 1 - point_2d.y() * 2;
|
||
|
}
|
||
|
|
||
|
for (int j = 0; j < 4; ++j) {
|
||
|
// For each of the 4 control points, formulate two rows of the
|
||
|
// m matrix (two equations).
|
||
|
const float control_alpha = epnp_alpha(i, j);
|
||
|
m(i * 2, j * 3) = focal_x * control_alpha;
|
||
|
m(i * 2, j * 3 + 2) = (center_x + x_ndc) * control_alpha;
|
||
|
m(i * 2 + 1, j * 3 + 1) = focal_y * control_alpha;
|
||
|
m(i * 2 + 1, j * 3 + 2) = (center_y + y_ndc) * control_alpha;
|
||
|
}
|
||
|
}
|
||
|
// This is a self adjoint matrix. Use SelfAdjointEigenSolver for a fast
|
||
|
// and stable solution.
|
||
|
Matrix<float, 12, 12> mt_m = m.transpose() * m;
|
||
|
Eigen::SelfAdjointEigenSolver<Matrix<float, 12, 12>> eigen_solver(mt_m);
|
||
|
if (eigen_solver.info() != Eigen::Success) {
|
||
|
return absl::AbortedError("Eigen decomposition failed.");
|
||
|
}
|
||
|
CHECK_EQ(12, eigen_solver.eigenvalues().size());
|
||
|
|
||
|
// Eigenvalues are sorted in increasing order for SelfAdjointEigenSolver
|
||
|
// only! If you use other Eigen Solvers, it's not guaranteed to be in
|
||
|
// increasing order. Here, we just take the eigen vector corresponding
|
||
|
// to first/smallest eigen value, since we used SelfAdjointEigenSolver.
|
||
|
Eigen::VectorXf eigen_vec = eigen_solver.eigenvectors().col(0);
|
||
|
Map<Matrix<float, 4, 3, Eigen::RowMajor>> control_matrix(eigen_vec.data());
|
||
|
|
||
|
// All 3D points should be in front of camera (z < 0).
|
||
|
if (control_matrix(0, 2) > 0) {
|
||
|
control_matrix = -control_matrix;
|
||
|
}
|
||
|
Matrix<float, kNumKeypoints - 1, 3> vertices = epnp_alpha * control_matrix;
|
||
|
|
||
|
// Fill 0th 3D points.
|
||
|
output_points_3d->emplace_back(control_matrix(0, 0), control_matrix(0, 1),
|
||
|
control_matrix(0, 2));
|
||
|
// Fill the rest 3D points.
|
||
|
for (int i = 0; i < kNumKeypoints - 1; ++i) {
|
||
|
output_points_3d->emplace_back(vertices(i, 0), vertices(i, 1),
|
||
|
vertices(i, 2));
|
||
|
}
|
||
|
return absl::OkStatus();
|
||
|
}
|
||
|
|
||
|
absl::Status SolveEpnp(const Eigen::Matrix4f& projection_matrix,
|
||
|
const bool portrait,
|
||
|
const std::vector<Vector2f>& input_points_2d,
|
||
|
std::vector<Vector3f>* output_points_3d) {
|
||
|
const float focal_x = projection_matrix(0, 0);
|
||
|
const float focal_y = projection_matrix(1, 1);
|
||
|
const float center_x = projection_matrix(0, 2);
|
||
|
const float center_y = projection_matrix(1, 2);
|
||
|
return SolveEpnp(focal_x, focal_y, center_x, center_y, portrait,
|
||
|
input_points_2d, output_points_3d);
|
||
|
}
|
||
|
|
||
|
} // namespace mediapipe
|