409 lines
14 KiB
C++
409 lines
14 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/util/tracking/camera_motion.h"
|
|
|
|
#include <numeric>
|
|
|
|
#include "absl/strings/str_format.h"
|
|
#include "mediapipe/util/tracking/region_flow.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
void CameraMotionToTranslation(const CameraMotion& camera_motion,
|
|
TranslationModel* model) {
|
|
if (camera_motion.has_translation()) {
|
|
model->CopyFrom(camera_motion.translation());
|
|
} else {
|
|
model->Clear();
|
|
}
|
|
}
|
|
|
|
void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
|
|
LinearSimilarityModel* model) {
|
|
if (camera_motion.has_linear_similarity()) {
|
|
model->CopyFrom(camera_motion.linear_similarity());
|
|
} else {
|
|
TranslationModel translation;
|
|
CameraMotionToTranslation(camera_motion, &translation);
|
|
model->CopyFrom(LinearSimilarityAdapter::Embed(translation));
|
|
}
|
|
}
|
|
|
|
void CameraMotionToAffine(const CameraMotion& camera_motion,
|
|
AffineModel* model) {
|
|
if (camera_motion.has_affine()) {
|
|
model->CopyFrom(camera_motion.affine());
|
|
} else {
|
|
LinearSimilarityModel similarity;
|
|
CameraMotionToLinearSimilarity(camera_motion, &similarity);
|
|
model->CopyFrom(AffineAdapter::Embed(similarity));
|
|
}
|
|
}
|
|
|
|
void CameraMotionToHomography(const CameraMotion& camera_motion,
|
|
Homography* model) {
|
|
if (camera_motion.has_homography()) {
|
|
model->CopyFrom(camera_motion.homography());
|
|
} else {
|
|
AffineModel affine;
|
|
CameraMotionToAffine(camera_motion, &affine);
|
|
model->CopyFrom(HomographyAdapter::Embed(affine));
|
|
}
|
|
}
|
|
|
|
void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
|
|
MixtureHomography* model) {
|
|
if (camera_motion.has_mixture_homography()) {
|
|
model->CopyFrom(camera_motion.mixture_homography());
|
|
} else {
|
|
Homography homography;
|
|
CameraMotionToHomography(camera_motion, &homography);
|
|
model->CopyFrom(MixtureHomographyAdapter::Embed(homography, 1));
|
|
}
|
|
}
|
|
|
|
CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
|
|
const CameraMotion& rhs) {
|
|
CHECK_EQ(lhs.frame_width(), rhs.frame_width());
|
|
CHECK_EQ(lhs.frame_height(), rhs.frame_height());
|
|
|
|
CameraMotion result = rhs;
|
|
if (lhs.has_translation() || rhs.has_translation()) {
|
|
*result.mutable_translation() =
|
|
ModelCompose2(lhs.translation(), rhs.translation());
|
|
}
|
|
|
|
if (lhs.has_similarity() || rhs.has_similarity()) {
|
|
*result.mutable_similarity() =
|
|
ModelCompose2(lhs.similarity(), rhs.similarity());
|
|
}
|
|
|
|
if (lhs.has_linear_similarity() || rhs.has_linear_similarity()) {
|
|
*result.mutable_linear_similarity() =
|
|
ModelCompose2(lhs.linear_similarity(), rhs.linear_similarity());
|
|
}
|
|
|
|
if (lhs.has_affine() || rhs.has_affine()) {
|
|
*result.mutable_affine() = ModelCompose2(lhs.affine(), rhs.affine());
|
|
}
|
|
|
|
if (lhs.has_homography() || rhs.has_homography()) {
|
|
*result.mutable_homography() =
|
|
ModelCompose2(lhs.homography(), rhs.homography());
|
|
}
|
|
|
|
if (rhs.has_mixture_homography()) {
|
|
if (lhs.has_mixture_homography()) {
|
|
LOG(ERROR) << "Mixture homographies are not closed under composition, "
|
|
<< "Only rhs mixtures composed with lhs homographies "
|
|
<< "are supported.";
|
|
} else if (lhs.type() <= CameraMotion::UNSTABLE_SIM) {
|
|
// We only composit base model when stability is sufficient.
|
|
*result.mutable_mixture_homography() =
|
|
MixtureHomographyAdapter::ComposeLeft(rhs.mixture_homography(),
|
|
lhs.homography());
|
|
}
|
|
} else if (lhs.has_mixture_homography()) {
|
|
LOG(ERROR) << "Only rhs mixtures supported.";
|
|
}
|
|
|
|
// Select max unstable type.
|
|
result.set_type(std::max(lhs.type(), rhs.type()));
|
|
result.set_average_magnitude(lhs.average_magnitude() +
|
|
rhs.average_magnitude());
|
|
result.set_translation_variance(
|
|
std::max(lhs.translation_variance(), rhs.translation_variance()));
|
|
result.set_similarity_inlier_ratio(
|
|
std::min(lhs.similarity_inlier_ratio(), rhs.similarity_inlier_ratio()));
|
|
|
|
result.set_similarity_strict_inlier_ratio(
|
|
std::min(lhs.similarity_strict_inlier_ratio(),
|
|
rhs.similarity_strict_inlier_ratio()));
|
|
|
|
result.set_average_homography_error(
|
|
std::max(lhs.average_homography_error(), rhs.average_homography_error()));
|
|
result.set_homography_inlier_coverage(std::min(
|
|
lhs.homography_inlier_coverage(), rhs.homography_inlier_coverage()));
|
|
result.set_homography_strict_inlier_coverage(
|
|
std::min(lhs.homography_strict_inlier_coverage(),
|
|
rhs.homography_strict_inlier_coverage()));
|
|
|
|
// TODO: Overlay stuff.
|
|
|
|
result.set_flags(lhs.flags() | rhs.flags());
|
|
result.set_timestamp_usec(
|
|
std::max(lhs.timestamp_usec(), rhs.timestamp_usec()));
|
|
result.set_match_frame(lhs.match_frame() + rhs.match_frame());
|
|
|
|
// TODO: Rest.
|
|
return result;
|
|
}
|
|
|
|
CameraMotion InvertCameraMotion(const CameraMotion& motion) {
|
|
CameraMotion inverted = motion;
|
|
if (motion.has_translation()) {
|
|
*inverted.mutable_translation() = ModelInvert(motion.translation());
|
|
}
|
|
|
|
if (motion.has_similarity()) {
|
|
*inverted.mutable_similarity() = ModelInvert(motion.similarity());
|
|
}
|
|
|
|
if (motion.has_linear_similarity()) {
|
|
*inverted.mutable_linear_similarity() =
|
|
ModelInvert(motion.linear_similarity());
|
|
}
|
|
|
|
if (motion.has_affine()) {
|
|
*inverted.mutable_affine() = ModelInvert(motion.affine());
|
|
}
|
|
|
|
if (motion.has_homography()) {
|
|
*inverted.mutable_homography() = ModelInvert(motion.homography());
|
|
}
|
|
|
|
if (motion.has_mixture_homography()) {
|
|
LOG(ERROR) << "Mixture homographies are not closed under inversion.";
|
|
}
|
|
|
|
return inverted;
|
|
}
|
|
|
|
void SubtractCameraMotionFromFeatures(
|
|
const std::vector<CameraMotion>& camera_motions,
|
|
std::vector<RegionFlowFeatureList*>* feature_lists) {
|
|
CHECK(feature_lists != nullptr);
|
|
CHECK_GE(camera_motions.size(), feature_lists->size());
|
|
if (feature_lists->empty()) {
|
|
return;
|
|
}
|
|
|
|
bool use_mixtures = camera_motions[0].has_mixture_homography();
|
|
|
|
std::unique_ptr<MixtureRowWeights> row_weights;
|
|
if (use_mixtures) {
|
|
row_weights.reset(MixtureRowWeightsFromCameraMotion(
|
|
camera_motions[0], (*feature_lists)[0]->frame_height()));
|
|
}
|
|
|
|
for (int k = 0; k < feature_lists->size(); ++k) {
|
|
Homography background_model;
|
|
MixtureHomography background_model_mixture;
|
|
if (use_mixtures) {
|
|
CameraMotionToMixtureHomography(camera_motions[k],
|
|
&background_model_mixture);
|
|
} else {
|
|
CameraMotionToHomography(camera_motions[k], &background_model);
|
|
}
|
|
|
|
// Remove motion due to camera motion, leaving only foreground motion.
|
|
for (auto& feature : *(*feature_lists)[k]->mutable_feature()) {
|
|
const Vector2_f background_motion =
|
|
(!use_mixtures ? HomographyAdapter::TransformPoint(
|
|
background_model, FeatureLocation(feature))
|
|
: MixtureHomographyAdapter::TransformPoint(
|
|
background_model_mixture, *row_weights,
|
|
FeatureLocation(feature))) -
|
|
FeatureLocation(feature);
|
|
const Vector2_f object_motion = FeatureFlow(feature) - background_motion;
|
|
feature.set_dx(object_motion.x());
|
|
feature.set_dy(object_motion.y());
|
|
}
|
|
}
|
|
}
|
|
|
|
float ForegroundMotion(const CameraMotion& camera_motion,
|
|
const RegionFlowFeatureList& feature_list) {
|
|
if (camera_motion.has_mixture_homography()) {
|
|
LOG(WARNING) << "Mixture homographies are present but function is only "
|
|
<< "using homographies. Truncation error likely.";
|
|
}
|
|
|
|
Homography background_motion;
|
|
CameraMotionToHomography(camera_motion, &background_motion);
|
|
|
|
float foreground_motion = 0;
|
|
for (auto& feature : feature_list.feature()) {
|
|
const float error = (FeatureMatchLocation(feature) -
|
|
HomographyAdapter::TransformPoint(
|
|
background_motion, FeatureLocation(feature)))
|
|
.Norm();
|
|
foreground_motion += error;
|
|
}
|
|
|
|
if (feature_list.feature_size() > 0) {
|
|
foreground_motion *= 1.0f / feature_list.feature_size();
|
|
}
|
|
|
|
return foreground_motion;
|
|
}
|
|
|
|
void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
|
|
CameraMotion* camera_motion) {
|
|
camera_motion->set_blur_score(feature_list.blur_score());
|
|
camera_motion->set_frac_long_features_rejected(
|
|
feature_list.frac_long_features_rejected());
|
|
camera_motion->set_timestamp_usec(feature_list.timestamp_usec());
|
|
camera_motion->set_match_frame(feature_list.match_frame());
|
|
camera_motion->set_frame_width(feature_list.frame_width());
|
|
camera_motion->set_frame_height(feature_list.frame_height());
|
|
}
|
|
|
|
std::string CameraMotionFlagToString(const CameraMotion& camera_motion) {
|
|
std::string text;
|
|
if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) {
|
|
text += "SHOT_BOUNDARY|";
|
|
}
|
|
|
|
if (camera_motion.flags() & CameraMotion::FLAG_BLURRY_FRAME) {
|
|
text += absl::StrFormat("BLURRY_FRAME %.2f|", camera_motion.bluriness());
|
|
}
|
|
|
|
if (camera_motion.flags() & CameraMotion::FLAG_MAJOR_OVERLAY) {
|
|
text += "MAJOR_OVERLAY|";
|
|
}
|
|
|
|
if (camera_motion.flags() & CameraMotion::FLAG_SHARP_FRAME) {
|
|
text += "SHARP_FRAME|";
|
|
}
|
|
|
|
if (camera_motion.flags() & CameraMotion::FLAG_SHOT_FADE) {
|
|
text += "SHOT_FADE|";
|
|
}
|
|
|
|
if (camera_motion.flags() & CameraMotion::FLAG_DUPLICATED) {
|
|
text += "DUPLICATED|";
|
|
}
|
|
return text;
|
|
}
|
|
|
|
std::string CameraMotionTypeToString(const CameraMotion& motion) {
|
|
switch (motion.type()) {
|
|
case CameraMotion::VALID:
|
|
return "VALID";
|
|
case CameraMotion::UNSTABLE_HOMOG:
|
|
return "UNSTABLE_HOMOG";
|
|
case CameraMotion::UNSTABLE_SIM:
|
|
return "UNSTABLE_SIM";
|
|
case CameraMotion::UNSTABLE:
|
|
return "UNSTABLE";
|
|
case CameraMotion::INVALID:
|
|
return "INVALID";
|
|
}
|
|
|
|
return "NEVER HAPPENS WITH CLANG";
|
|
}
|
|
|
|
float InlierCoverage(const CameraMotion& camera_motion,
|
|
bool use_homography_coverage) {
|
|
const int num_block_coverages = camera_motion.mixture_inlier_coverage_size();
|
|
if (num_block_coverages == 0 || use_homography_coverage) {
|
|
if (camera_motion.has_homography()) {
|
|
return camera_motion.homography_inlier_coverage();
|
|
} else {
|
|
return 1.0f;
|
|
}
|
|
} else {
|
|
return std::accumulate(camera_motion.mixture_inlier_coverage().begin(),
|
|
camera_motion.mixture_inlier_coverage().end(),
|
|
0.0f) *
|
|
(1.0f / num_block_coverages);
|
|
}
|
|
}
|
|
|
|
template <class CameraMotionContainer>
|
|
CameraMotion FirstCameraMotionForLooping(
|
|
const CameraMotionContainer& camera_motions) {
|
|
if (camera_motions.size() < 2) {
|
|
LOG(ERROR) << "Not enough camera motions for refinement.";
|
|
return CameraMotion();
|
|
}
|
|
|
|
CameraMotion loop_motion = camera_motions[0];
|
|
|
|
// Only update motions present in first camera motion.
|
|
bool has_translation = loop_motion.has_translation();
|
|
bool has_similarity = loop_motion.has_linear_similarity();
|
|
bool has_homography = loop_motion.has_homography();
|
|
|
|
TranslationModel translation;
|
|
LinearSimilarityModel similarity;
|
|
Homography homography;
|
|
|
|
for (int i = 1; i < camera_motions.size(); ++i) {
|
|
const CameraMotion& motion = camera_motions[i];
|
|
if (motion.has_mixture_homography()) {
|
|
// TODO: Implement
|
|
LOG(WARNING) << "This function does not validly apply mixtures; "
|
|
<< "which are currently not closed under composition. ";
|
|
}
|
|
|
|
switch (motion.type()) {
|
|
case CameraMotion::INVALID:
|
|
has_translation = false;
|
|
has_similarity = false;
|
|
has_homography = false;
|
|
break;
|
|
case CameraMotion::UNSTABLE:
|
|
has_similarity = false;
|
|
has_homography = false;
|
|
break;
|
|
case CameraMotion::UNSTABLE_SIM:
|
|
has_homography = false;
|
|
break;
|
|
case CameraMotion::VALID:
|
|
case CameraMotion::UNSTABLE_HOMOG:
|
|
break;
|
|
default:
|
|
LOG(FATAL) << "Unknown CameraMotion::type.";
|
|
}
|
|
|
|
// Only accumulate motions which are valid for the entire chain, otherwise
|
|
// keep the pre-initialized motions.
|
|
has_translation =
|
|
has_translation && motion.has_translation() &&
|
|
AccumulateInvertedModel(motion.translation(), &translation);
|
|
has_similarity =
|
|
has_similarity && motion.has_linear_similarity() &&
|
|
AccumulateInvertedModel(motion.linear_similarity(), &similarity);
|
|
has_homography = has_homography && motion.has_homography() &&
|
|
AccumulateInvertedModel(motion.homography(), &homography);
|
|
}
|
|
|
|
if (has_translation) {
|
|
*loop_motion.mutable_translation() = translation;
|
|
}
|
|
if (has_similarity && IsInverseStable(similarity)) {
|
|
*loop_motion.mutable_linear_similarity() = similarity;
|
|
}
|
|
if (has_homography && IsInverseStable(homography)) {
|
|
*loop_motion.mutable_homography() = homography;
|
|
}
|
|
|
|
VLOG(1) << "Looping camera motion refinement for "
|
|
<< " translation:" << (has_translation ? "successful" : "failed")
|
|
<< " similarity:" << (has_similarity ? "successful" : "failed")
|
|
<< " homography:" << (has_homography ? "successful" : "failed");
|
|
|
|
return loop_motion;
|
|
}
|
|
|
|
// Explicit instantiation.
|
|
template CameraMotion FirstCameraMotionForLooping<std::vector<CameraMotion>>(
|
|
const std::vector<CameraMotion>&);
|
|
template CameraMotion FirstCameraMotionForLooping<std::deque<CameraMotion>>(
|
|
const std::deque<CameraMotion>&);
|
|
} // namespace mediapipe
|