// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "mediapipe/util/tracking/camera_motion.h" #include #include "absl/strings/str_format.h" #include "mediapipe/util/tracking/region_flow.h" namespace mediapipe { void CameraMotionToTranslation(const CameraMotion& camera_motion, TranslationModel* model) { if (camera_motion.has_translation()) { model->CopyFrom(camera_motion.translation()); } else { model->Clear(); } } void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion, LinearSimilarityModel* model) { if (camera_motion.has_linear_similarity()) { model->CopyFrom(camera_motion.linear_similarity()); } else { TranslationModel translation; CameraMotionToTranslation(camera_motion, &translation); model->CopyFrom(LinearSimilarityAdapter::Embed(translation)); } } void CameraMotionToAffine(const CameraMotion& camera_motion, AffineModel* model) { if (camera_motion.has_affine()) { model->CopyFrom(camera_motion.affine()); } else { LinearSimilarityModel similarity; CameraMotionToLinearSimilarity(camera_motion, &similarity); model->CopyFrom(AffineAdapter::Embed(similarity)); } } void CameraMotionToHomography(const CameraMotion& camera_motion, Homography* model) { if (camera_motion.has_homography()) { model->CopyFrom(camera_motion.homography()); } else { AffineModel affine; CameraMotionToAffine(camera_motion, &affine); model->CopyFrom(HomographyAdapter::Embed(affine)); } } void CameraMotionToMixtureHomography(const CameraMotion& camera_motion, MixtureHomography* model) { if (camera_motion.has_mixture_homography()) { model->CopyFrom(camera_motion.mixture_homography()); } else { Homography homography; CameraMotionToHomography(camera_motion, &homography); model->CopyFrom(MixtureHomographyAdapter::Embed(homography, 1)); } } CameraMotion ComposeCameraMotion(const CameraMotion& lhs, const CameraMotion& rhs) { CHECK_EQ(lhs.frame_width(), rhs.frame_width()); CHECK_EQ(lhs.frame_height(), rhs.frame_height()); CameraMotion result = rhs; if (lhs.has_translation() || rhs.has_translation()) { *result.mutable_translation() = ModelCompose2(lhs.translation(), rhs.translation()); } if (lhs.has_similarity() || rhs.has_similarity()) { *result.mutable_similarity() = ModelCompose2(lhs.similarity(), rhs.similarity()); } if (lhs.has_linear_similarity() || rhs.has_linear_similarity()) { *result.mutable_linear_similarity() = ModelCompose2(lhs.linear_similarity(), rhs.linear_similarity()); } if (lhs.has_affine() || rhs.has_affine()) { *result.mutable_affine() = ModelCompose2(lhs.affine(), rhs.affine()); } if (lhs.has_homography() || rhs.has_homography()) { *result.mutable_homography() = ModelCompose2(lhs.homography(), rhs.homography()); } if (rhs.has_mixture_homography()) { if (lhs.has_mixture_homography()) { LOG(ERROR) << "Mixture homographies are not closed under composition, " << "Only rhs mixtures composed with lhs homographies " << "are supported."; } else if (lhs.type() <= CameraMotion::UNSTABLE_SIM) { // We only composit base model when stability is sufficient. *result.mutable_mixture_homography() = MixtureHomographyAdapter::ComposeLeft(rhs.mixture_homography(), lhs.homography()); } } else if (lhs.has_mixture_homography()) { LOG(ERROR) << "Only rhs mixtures supported."; } // Select max unstable type. result.set_type(std::max(lhs.type(), rhs.type())); result.set_average_magnitude(lhs.average_magnitude() + rhs.average_magnitude()); result.set_translation_variance( std::max(lhs.translation_variance(), rhs.translation_variance())); result.set_similarity_inlier_ratio( std::min(lhs.similarity_inlier_ratio(), rhs.similarity_inlier_ratio())); result.set_similarity_strict_inlier_ratio( std::min(lhs.similarity_strict_inlier_ratio(), rhs.similarity_strict_inlier_ratio())); result.set_average_homography_error( std::max(lhs.average_homography_error(), rhs.average_homography_error())); result.set_homography_inlier_coverage(std::min( lhs.homography_inlier_coverage(), rhs.homography_inlier_coverage())); result.set_homography_strict_inlier_coverage( std::min(lhs.homography_strict_inlier_coverage(), rhs.homography_strict_inlier_coverage())); // TODO: Overlay stuff. result.set_flags(lhs.flags() | rhs.flags()); result.set_timestamp_usec( std::max(lhs.timestamp_usec(), rhs.timestamp_usec())); result.set_match_frame(lhs.match_frame() + rhs.match_frame()); // TODO: Rest. return result; } CameraMotion InvertCameraMotion(const CameraMotion& motion) { CameraMotion inverted = motion; if (motion.has_translation()) { *inverted.mutable_translation() = ModelInvert(motion.translation()); } if (motion.has_similarity()) { *inverted.mutable_similarity() = ModelInvert(motion.similarity()); } if (motion.has_linear_similarity()) { *inverted.mutable_linear_similarity() = ModelInvert(motion.linear_similarity()); } if (motion.has_affine()) { *inverted.mutable_affine() = ModelInvert(motion.affine()); } if (motion.has_homography()) { *inverted.mutable_homography() = ModelInvert(motion.homography()); } if (motion.has_mixture_homography()) { LOG(ERROR) << "Mixture homographies are not closed under inversion."; } return inverted; } void SubtractCameraMotionFromFeatures( const std::vector& camera_motions, std::vector* feature_lists) { CHECK(feature_lists != nullptr); CHECK_GE(camera_motions.size(), feature_lists->size()); if (feature_lists->empty()) { return; } bool use_mixtures = camera_motions[0].has_mixture_homography(); std::unique_ptr row_weights; if (use_mixtures) { row_weights.reset(MixtureRowWeightsFromCameraMotion( camera_motions[0], (*feature_lists)[0]->frame_height())); } for (int k = 0; k < feature_lists->size(); ++k) { Homography background_model; MixtureHomography background_model_mixture; if (use_mixtures) { CameraMotionToMixtureHomography(camera_motions[k], &background_model_mixture); } else { CameraMotionToHomography(camera_motions[k], &background_model); } // Remove motion due to camera motion, leaving only foreground motion. for (auto& feature : *(*feature_lists)[k]->mutable_feature()) { const Vector2_f background_motion = (!use_mixtures ? HomographyAdapter::TransformPoint( background_model, FeatureLocation(feature)) : MixtureHomographyAdapter::TransformPoint( background_model_mixture, *row_weights, FeatureLocation(feature))) - FeatureLocation(feature); const Vector2_f object_motion = FeatureFlow(feature) - background_motion; feature.set_dx(object_motion.x()); feature.set_dy(object_motion.y()); } } } float ForegroundMotion(const CameraMotion& camera_motion, const RegionFlowFeatureList& feature_list) { if (camera_motion.has_mixture_homography()) { LOG(WARNING) << "Mixture homographies are present but function is only " << "using homographies. Truncation error likely."; } Homography background_motion; CameraMotionToHomography(camera_motion, &background_motion); float foreground_motion = 0; for (auto& feature : feature_list.feature()) { const float error = (FeatureMatchLocation(feature) - HomographyAdapter::TransformPoint( background_motion, FeatureLocation(feature))) .Norm(); foreground_motion += error; } if (feature_list.feature_size() > 0) { foreground_motion *= 1.0f / feature_list.feature_size(); } return foreground_motion; } void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list, CameraMotion* camera_motion) { camera_motion->set_blur_score(feature_list.blur_score()); camera_motion->set_frac_long_features_rejected( feature_list.frac_long_features_rejected()); camera_motion->set_timestamp_usec(feature_list.timestamp_usec()); camera_motion->set_match_frame(feature_list.match_frame()); camera_motion->set_frame_width(feature_list.frame_width()); camera_motion->set_frame_height(feature_list.frame_height()); } std::string CameraMotionFlagToString(const CameraMotion& camera_motion) { std::string text; if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) { text += "SHOT_BOUNDARY|"; } if (camera_motion.flags() & CameraMotion::FLAG_BLURRY_FRAME) { text += absl::StrFormat("BLURRY_FRAME %.2f|", camera_motion.bluriness()); } if (camera_motion.flags() & CameraMotion::FLAG_MAJOR_OVERLAY) { text += "MAJOR_OVERLAY|"; } if (camera_motion.flags() & CameraMotion::FLAG_SHARP_FRAME) { text += "SHARP_FRAME|"; } if (camera_motion.flags() & CameraMotion::FLAG_SHOT_FADE) { text += "SHOT_FADE|"; } if (camera_motion.flags() & CameraMotion::FLAG_DUPLICATED) { text += "DUPLICATED|"; } return text; } std::string CameraMotionTypeToString(const CameraMotion& motion) { switch (motion.type()) { case CameraMotion::VALID: return "VALID"; case CameraMotion::UNSTABLE_HOMOG: return "UNSTABLE_HOMOG"; case CameraMotion::UNSTABLE_SIM: return "UNSTABLE_SIM"; case CameraMotion::UNSTABLE: return "UNSTABLE"; case CameraMotion::INVALID: return "INVALID"; } return "NEVER HAPPENS WITH CLANG"; } float InlierCoverage(const CameraMotion& camera_motion, bool use_homography_coverage) { const int num_block_coverages = camera_motion.mixture_inlier_coverage_size(); if (num_block_coverages == 0 || use_homography_coverage) { if (camera_motion.has_homography()) { return camera_motion.homography_inlier_coverage(); } else { return 1.0f; } } else { return std::accumulate(camera_motion.mixture_inlier_coverage().begin(), camera_motion.mixture_inlier_coverage().end(), 0.0f) * (1.0f / num_block_coverages); } } template CameraMotion FirstCameraMotionForLooping( const CameraMotionContainer& camera_motions) { if (camera_motions.size() < 2) { LOG(ERROR) << "Not enough camera motions for refinement."; return CameraMotion(); } CameraMotion loop_motion = camera_motions[0]; // Only update motions present in first camera motion. bool has_translation = loop_motion.has_translation(); bool has_similarity = loop_motion.has_linear_similarity(); bool has_homography = loop_motion.has_homography(); TranslationModel translation; LinearSimilarityModel similarity; Homography homography; for (int i = 1; i < camera_motions.size(); ++i) { const CameraMotion& motion = camera_motions[i]; if (motion.has_mixture_homography()) { // TODO: Implement LOG(WARNING) << "This function does not validly apply mixtures; " << "which are currently not closed under composition. "; } switch (motion.type()) { case CameraMotion::INVALID: has_translation = false; has_similarity = false; has_homography = false; break; case CameraMotion::UNSTABLE: has_similarity = false; has_homography = false; break; case CameraMotion::UNSTABLE_SIM: has_homography = false; break; case CameraMotion::VALID: case CameraMotion::UNSTABLE_HOMOG: break; default: LOG(FATAL) << "Unknown CameraMotion::type."; } // Only accumulate motions which are valid for the entire chain, otherwise // keep the pre-initialized motions. has_translation = has_translation && motion.has_translation() && AccumulateInvertedModel(motion.translation(), &translation); has_similarity = has_similarity && motion.has_linear_similarity() && AccumulateInvertedModel(motion.linear_similarity(), &similarity); has_homography = has_homography && motion.has_homography() && AccumulateInvertedModel(motion.homography(), &homography); } if (has_translation) { *loop_motion.mutable_translation() = translation; } if (has_similarity && IsInverseStable(similarity)) { *loop_motion.mutable_linear_similarity() = similarity; } if (has_homography && IsInverseStable(homography)) { *loop_motion.mutable_homography() = homography; } VLOG(1) << "Looping camera motion refinement for " << " translation:" << (has_translation ? "successful" : "failed") << " similarity:" << (has_similarity ? "successful" : "failed") << " homography:" << (has_homography ? "successful" : "failed"); return loop_motion; } // Explicit instantiation. template CameraMotion FirstCameraMotionForLooping>( const std::vector&); template CameraMotion FirstCameraMotionForLooping>( const std::deque&); } // namespace mediapipe