337 lines
12 KiB
C++
337 lines
12 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|
|
#define MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|
|
|
|
#include <vector>
|
|
|
|
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
|
#include "mediapipe/util/tracking/motion_models.h"
|
|
#include "mediapipe/util/tracking/region_flow.pb.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
// Helper functions to extract specific models from CameraMotion.
|
|
// Returned is always the requested model. In case, model is not present (i.e.
|
|
// has_<motion model> fails), the highest degree of freedom model
|
|
// (lower or equal to the requested model) that is present is embedded in the
|
|
// requested model.
|
|
// Presence of the model depends on wich models were requesteded to be
|
|
// estimated (via MotionEstimationOptions, to initialize requested models to
|
|
// identity, use ResetMotionModels above). For example, assume linear similarity
|
|
// was not requested to be estimated, but affine was requested. If
|
|
// CameraMotionToLinearSimilarity is called, has_linear_similarity would be
|
|
// false and the function fall back returning a translation model.
|
|
void CameraMotionToTranslation(const CameraMotion& camera_motion,
|
|
TranslationModel* model);
|
|
void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
|
|
LinearSimilarityModel* model);
|
|
void CameraMotionToAffine(const CameraMotion& camera_motion,
|
|
AffineModel* model);
|
|
void CameraMotionToHomography(const CameraMotion& camera_motion,
|
|
Homography* homography);
|
|
void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
|
|
MixtureHomography* mixture);
|
|
|
|
// TODO: Under development ...
|
|
// Returns camera motion lhs * rhs. Initial camera motion is set to rhs
|
|
// before composition.
|
|
CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
|
|
const CameraMotion& rhs);
|
|
|
|
// Inverts every motion model that is set in CameraMotion.
|
|
CameraMotion InvertCameraMotion(const CameraMotion& motion);
|
|
|
|
// Templated wrapper for above calls.
|
|
template <class Model>
|
|
Model CameraMotionToModel(const CameraMotion& camera_motion);
|
|
|
|
// Returns model from passed CameraMotion specified by unstable_type
|
|
// (which must name a type != VALID, CHECK-ed) and embeds it in the specified
|
|
// Model.
|
|
template <class Model>
|
|
Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
|
|
CameraMotion::Type unstable_type);
|
|
|
|
// Projects passed model to lower degree of freedom model (embedded in original
|
|
// type), as specified type. In case type is valid, function is effectively
|
|
// identity function.
|
|
// Only implemented for the following models:
|
|
// - Translation
|
|
// - LinearSimilarity
|
|
// - AffineModel
|
|
template <class Model>
|
|
Model ProjectToTypeModel(const Model& model, float frame_width,
|
|
float frame_height, CameraMotion::Type type);
|
|
|
|
// Substract camera motion (specifically highest, degree of freedom model,
|
|
// that has been estimated reliably) from feature lists. Operates on vectors
|
|
// for improved performance. Size of camera_motions can be larger than
|
|
// feature_lists, in this case last camera motions are ignored.
|
|
void SubtractCameraMotionFromFeatures(
|
|
const std::vector<CameraMotion>& camera_motions,
|
|
std::vector<RegionFlowFeatureList*>* feature_lists);
|
|
|
|
// Returns average motion magnitude after subtracting camera motion.
|
|
float ForegroundMotion(const CameraMotion& camera_motion,
|
|
const RegionFlowFeatureList& feature_list);
|
|
|
|
// Initializes a CameraMotion with its corresponding fields from a
|
|
// RegionFlowFeatureList.
|
|
void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
|
|
CameraMotion* camera_motion);
|
|
|
|
// Converts Camera motion flag to std::string.
|
|
std::string CameraMotionFlagToString(const CameraMotion& motion);
|
|
|
|
// Converts Camera motion type to std::string. Used instead of builtin proto
|
|
// function for mobile support.
|
|
std::string CameraMotionTypeToString(const CameraMotion& motion);
|
|
|
|
// Returns inlier coverage either based on mixture (if present, in this case
|
|
// return mean of block coverages) or else homography.
|
|
// If neither is present, returns 0 to signal insufficient inliers.
|
|
// If use_homography_coverage is set, uses homography even when mixture is
|
|
// present.
|
|
float InlierCoverage(const CameraMotion& camera_motion,
|
|
bool use_homography_coverage);
|
|
|
|
// Downsamples passed motion models temporally by specified downsample_scale,
|
|
// i.e. for models F_0, F_1, F_2, F_3, F_4 and downsample_scale of 2, models:
|
|
// F_0 * F_1, F_2 * F_3 and F_4 are returned.
|
|
// Optionally also performs downsampling of corresponding model_type returning
|
|
// the least unstable for each composition.
|
|
template <class Model>
|
|
void DownsampleMotionModels(
|
|
const std::vector<Model>& models,
|
|
const std::vector<CameraMotion::Type>* model_type, // optional.
|
|
int downsample_scale, std::vector<Model>* downsampled_models,
|
|
std::vector<CameraMotion::Type>* downsampled_types);
|
|
|
|
// Compatible subsampling method to above DownsampleMotionModels.
|
|
// Note, when downsampling for example:
|
|
// F_0, F_1, F_2, F_3, F_4 by factor 3 via above function, downsampled result
|
|
// will be F_0 * F_1 * F_2, F_3 * F_4
|
|
// so we would need to pick entities at F_2 and F_4.
|
|
// Template class Container must be SequenceContainer, like
|
|
// std::vector, std::deque.
|
|
template <class Container>
|
|
void SubsampleEntities(const Container& input, int downsample_scale,
|
|
Container* output);
|
|
|
|
// For perfect looping, this function computes the motion in the first frame
|
|
// to be the inverse of the accumulated motion from frame 1 to N.
|
|
// If a particular motion type is not available or not invertible at any
|
|
// frame pair, the original motion for that type is retained.
|
|
// Does not work if mixtures are present.
|
|
template <class CameraMotionContainer> // STL container of CameraMotion's
|
|
CameraMotion FirstCameraMotionForLooping(
|
|
const CameraMotionContainer& container);
|
|
|
|
// Template implementation functions.
|
|
|
|
template <class Model>
|
|
Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
|
|
CameraMotion::Type unstable_type) {
|
|
switch (unstable_type) {
|
|
case CameraMotion::INVALID:
|
|
return Model(); // Identity.
|
|
|
|
case CameraMotion::UNSTABLE: {
|
|
return ModelAdapter<Model>::Embed(
|
|
CameraMotionToModel<TranslationModel>(camera_motion));
|
|
}
|
|
|
|
case CameraMotion::UNSTABLE_SIM: {
|
|
return ModelAdapter<Model>::Embed(
|
|
CameraMotionToModel<LinearSimilarityModel>(camera_motion));
|
|
}
|
|
|
|
case CameraMotion::UNSTABLE_HOMOG: {
|
|
return ModelAdapter<Model>::Embed(
|
|
CameraMotionToModel<Homography>(camera_motion));
|
|
}
|
|
|
|
case CameraMotion::VALID:
|
|
LOG(FATAL) << "Specify a type != VALID";
|
|
return Model();
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline TranslationModel ProjectToTypeModel(const TranslationModel& model,
|
|
float frame_width,
|
|
float frame_height,
|
|
CameraMotion::Type type) {
|
|
switch (type) {
|
|
case CameraMotion::INVALID:
|
|
return TranslationModel(); // Identity.
|
|
default:
|
|
return model;
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline LinearSimilarityModel ProjectToTypeModel(
|
|
const LinearSimilarityModel& model, float frame_width, float frame_height,
|
|
CameraMotion::Type type) {
|
|
switch (type) {
|
|
case CameraMotion::INVALID:
|
|
return LinearSimilarityModel(); // Identity.
|
|
|
|
case CameraMotion::UNSTABLE:
|
|
return LinearSimilarityAdapter::Embed(
|
|
TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
|
|
|
|
default:
|
|
return model;
|
|
}
|
|
}
|
|
|
|
template <class Model>
|
|
Model ProjectToTypeModel(const Model& model, float frame_width,
|
|
float frame_height, CameraMotion::Type type) {
|
|
switch (type) {
|
|
case CameraMotion::INVALID:
|
|
return Model(); // Identity.
|
|
|
|
case CameraMotion::UNSTABLE:
|
|
return ModelAdapter<Model>::Embed(
|
|
TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
|
|
|
|
case CameraMotion::UNSTABLE_SIM:
|
|
return ModelAdapter<Model>::Embed(LinearSimilarityAdapter::ProjectFrom(
|
|
model, frame_width, frame_height));
|
|
|
|
// case UNSTABLE_HOMOG does not occur except for mixtures.
|
|
|
|
default:
|
|
return model;
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline MixtureHomography ProjectToTypeModel(const MixtureHomography&, float,
|
|
float, CameraMotion::Type) {
|
|
LOG(FATAL) << "Projection not supported for mixtures.";
|
|
return MixtureHomography();
|
|
}
|
|
|
|
template <class Model>
|
|
void DownsampleMotionModels(
|
|
const std::vector<Model>& models,
|
|
const std::vector<CameraMotion::Type>* model_type, int downsample_scale,
|
|
std::vector<Model>* downsampled_models,
|
|
std::vector<CameraMotion::Type>* downsampled_types) {
|
|
if (model_type) {
|
|
CHECK_EQ(models.size(), model_type->size());
|
|
CHECK(downsampled_models) << "Expecting output models.";
|
|
}
|
|
|
|
CHECK(downsampled_models);
|
|
downsampled_models->clear();
|
|
if (downsampled_types) {
|
|
downsampled_types->clear();
|
|
}
|
|
|
|
const int num_models = models.size();
|
|
|
|
for (int model_idx = 0; model_idx < num_models;
|
|
model_idx += downsample_scale) {
|
|
const int last_idx =
|
|
std::min<int>(model_idx + downsample_scale, num_models) - 1;
|
|
|
|
CameraMotion::Type sampled_type = CameraMotion::VALID;
|
|
if (model_type) {
|
|
// Get least stable model within downsample window (max operation).
|
|
for (int i = model_idx; i <= last_idx; ++i) {
|
|
sampled_type = std::max(sampled_type, model_type->at(i));
|
|
}
|
|
downsampled_types->push_back(sampled_type);
|
|
}
|
|
|
|
// Concatenate models.
|
|
Model composed = models[last_idx];
|
|
|
|
for (int i = last_idx - 1; i >= model_idx; --i) {
|
|
composed = ModelCompose2(models[i], composed);
|
|
}
|
|
|
|
downsampled_models->push_back(composed);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void SubsampleEntities(const Container& input, int downsample_factor,
|
|
Container* output) {
|
|
CHECK(output);
|
|
output->clear();
|
|
|
|
if (input.empty()) {
|
|
return;
|
|
}
|
|
|
|
for (int k = downsample_factor - 1; k < input.size();
|
|
k += downsample_factor) {
|
|
output->push_back(input[k]);
|
|
}
|
|
|
|
if (input.size() % downsample_factor != 0) {
|
|
// We need to add last constraint as termination.
|
|
output->push_back(input.back());
|
|
}
|
|
}
|
|
|
|
template <>
|
|
inline TranslationModel CameraMotionToModel(const CameraMotion& camera_motion) {
|
|
TranslationModel model;
|
|
CameraMotionToTranslation(camera_motion, &model);
|
|
return model;
|
|
}
|
|
|
|
template <>
|
|
inline LinearSimilarityModel CameraMotionToModel(
|
|
const CameraMotion& camera_motion) {
|
|
LinearSimilarityModel model;
|
|
CameraMotionToLinearSimilarity(camera_motion, &model);
|
|
return model;
|
|
}
|
|
|
|
template <>
|
|
inline AffineModel CameraMotionToModel(const CameraMotion& camera_motion) {
|
|
AffineModel model;
|
|
CameraMotionToAffine(camera_motion, &model);
|
|
return model;
|
|
}
|
|
|
|
template <>
|
|
inline Homography CameraMotionToModel(const CameraMotion& camera_motion) {
|
|
Homography model;
|
|
CameraMotionToHomography(camera_motion, &model);
|
|
return model;
|
|
}
|
|
|
|
template <>
|
|
inline MixtureHomography CameraMotionToModel(
|
|
const CameraMotion& camera_motion) {
|
|
MixtureHomography model;
|
|
CameraMotionToMixtureHomography(camera_motion, &model);
|
|
return model;
|
|
}
|
|
|
|
} // namespace mediapipe
|
|
|
|
#endif // MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|