701 lines
24 KiB
C++
701 lines
24 KiB
C++
|
// Copyright 2019 The MediaPipe Authors.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
#include "mediapipe/util/tracking/motion_saliency.h"
|
||
|
|
||
|
#include <math.h>
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <cmath>
|
||
|
#include <deque>
|
||
|
#include <iterator>
|
||
|
#include <list>
|
||
|
#include <memory>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "mediapipe/framework/port/logging.h"
|
||
|
#include "mediapipe/util/tracking/camera_motion.h"
|
||
|
#include "mediapipe/util/tracking/measure_time.h"
|
||
|
#include "mediapipe/util/tracking/region_flow.h"
|
||
|
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||
|
|
||
|
namespace mediapipe {
|
||
|
|
||
|
MotionSaliency::MotionSaliency(const MotionSaliencyOptions& options,
|
||
|
int frame_width, int frame_height)
|
||
|
: options_(options),
|
||
|
frame_width_(frame_width),
|
||
|
frame_height_(frame_height) {}
|
||
|
|
||
|
MotionSaliency::~MotionSaliency() {}
|
||
|
|
||
|
void MotionSaliency::SaliencyFromFeatures(
|
||
|
const RegionFlowFeatureList& feature_list,
|
||
|
std::vector<float>* irls_weights, // optional.
|
||
|
SalientPointFrame* salient_frame) {
|
||
|
CHECK(salient_frame);
|
||
|
CHECK_EQ(frame_width_, feature_list.frame_width());
|
||
|
CHECK_EQ(frame_height_, feature_list.frame_height());
|
||
|
|
||
|
if (irls_weights) {
|
||
|
CHECK_EQ(feature_list.feature_size(), irls_weights->size());
|
||
|
}
|
||
|
|
||
|
if (feature_list.feature_size() < 1) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
float max_irls_weight = 0;
|
||
|
if (irls_weights) {
|
||
|
max_irls_weight =
|
||
|
*std::max_element(irls_weights->begin(), irls_weights->end());
|
||
|
} else {
|
||
|
struct FeatureIRLSComparator {
|
||
|
bool operator()(const RegionFlowFeature& lhs,
|
||
|
const RegionFlowFeature& rhs) const {
|
||
|
return lhs.irls_weight() < rhs.irls_weight();
|
||
|
}
|
||
|
};
|
||
|
max_irls_weight =
|
||
|
std::max_element(feature_list.feature().begin(),
|
||
|
feature_list.feature().end(), FeatureIRLSComparator())
|
||
|
->irls_weight();
|
||
|
}
|
||
|
|
||
|
// Max weight is too small for meaningful mode finding, terminate.
|
||
|
if (max_irls_weight < 1e-2f) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Discard small weights that just slow clustering down.
|
||
|
const float irls_cutoff = max_irls_weight * 1e-2f;
|
||
|
|
||
|
int feat_idx = 0;
|
||
|
|
||
|
// Create SalientLocation's from input feature_list.
|
||
|
std::vector<SalientLocation> features;
|
||
|
for (const auto& src_feature : feature_list.feature()) {
|
||
|
const float weight =
|
||
|
irls_weights ? (*irls_weights)[feat_idx] : src_feature.irls_weight();
|
||
|
++feat_idx;
|
||
|
|
||
|
// Discard all features with small measure or zero weight from mode finding.
|
||
|
if (weight < irls_cutoff) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
features.push_back(SalientLocation(FeatureLocation(src_feature), weight));
|
||
|
}
|
||
|
|
||
|
DetermineSalientFrame(features, salient_frame);
|
||
|
}
|
||
|
|
||
|
void MotionSaliency::SaliencyFromPoints(const std::vector<Vector2_f>* points,
|
||
|
const std::vector<float>* weights,
|
||
|
SalientPointFrame* salient_frame) {
|
||
|
// TODO: Handle vectors of size zero.
|
||
|
CHECK(salient_frame);
|
||
|
CHECK_EQ(points->size(), weights->size());
|
||
|
|
||
|
float max_weight = *std::max_element(weights->begin(), weights->end());
|
||
|
|
||
|
// Max weight is too small for meaningful mode finding, terminate.
|
||
|
if (max_weight < 1e-2f) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Discard small weights that just slow clustering down.
|
||
|
const float weight_cutoff = max_weight * 1e-2f;
|
||
|
|
||
|
// Create SalientLocation's from input points.
|
||
|
std::vector<SalientLocation> features;
|
||
|
for (int point_idx = 0; point_idx < points->size(); ++point_idx) {
|
||
|
const float weight = (*weights)[point_idx];
|
||
|
// Discard all features with small measure or zero weight from mode finding.
|
||
|
if (weight < weight_cutoff) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
features.push_back(SalientLocation((*points)[point_idx], weight));
|
||
|
}
|
||
|
|
||
|
DetermineSalientFrame(features, salient_frame);
|
||
|
}
|
||
|
|
||
|
// We only keep those salient points that have neighbors along the temporal
|
||
|
// dimension.
|
||
|
void MotionSaliency::SelectSaliencyInliers(
|
||
|
std::vector<SalientPointFrame*>* motion_saliency,
|
||
|
bool rescale_to_median_saliency_weight) {
|
||
|
float scale = 1.0;
|
||
|
|
||
|
if (rescale_to_median_saliency_weight) {
|
||
|
// Compute median saliency weight across all frames, to rescale saliency.
|
||
|
std::vector<float> saliency_weights;
|
||
|
for (int i = 0; i < motion_saliency->size(); ++i) {
|
||
|
for (const auto& salient_point : (*motion_saliency)[i]->point()) {
|
||
|
saliency_weights.push_back(salient_point.weight());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Nothing to filter in the frame chunk.
|
||
|
if (saliency_weights.empty()) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
auto median_iter = saliency_weights.begin() + saliency_weights.size() / 2;
|
||
|
std::nth_element(saliency_weights.begin(), median_iter,
|
||
|
saliency_weights.end());
|
||
|
|
||
|
const float median_weight = *median_iter;
|
||
|
if (median_weight > 0) {
|
||
|
scale = options_.saliency_weight() / median_weight;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SaliencyPointList inlier_saliency(motion_saliency->size());
|
||
|
const float sq_support_distance = options_.selection_support_distance() *
|
||
|
options_.selection_support_distance();
|
||
|
|
||
|
// Test each point salient point for inlierness.
|
||
|
for (int i = 0; i < motion_saliency->size(); ++i) {
|
||
|
for (const auto& salient_point : (*motion_saliency)[i]->point()) {
|
||
|
int support = 0;
|
||
|
Vector2_f salient_location(salient_point.norm_point_x(),
|
||
|
salient_point.norm_point_y());
|
||
|
|
||
|
// Find supporting points (saliency points close enough to current one)
|
||
|
// in adjacent frames. Linear Complexity.
|
||
|
for (int j = std::max<int>(0, i - options_.selection_frame_radius()),
|
||
|
end_j = std::min<int>(i + options_.selection_frame_radius(),
|
||
|
motion_saliency->size() - 1);
|
||
|
|
||
|
j <= end_j; ++j) {
|
||
|
if (i == j) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
for (const auto& compare_point : (*motion_saliency)[j]->point()) {
|
||
|
Vector2_f compare_location(compare_point.norm_point_x(),
|
||
|
compare_point.norm_point_y());
|
||
|
|
||
|
if ((salient_location - compare_location).Norm2() <=
|
||
|
sq_support_distance) {
|
||
|
++support;
|
||
|
}
|
||
|
}
|
||
|
} // end neighbor frames iteration.
|
||
|
|
||
|
if (support >= options_.selection_minimum_support()) {
|
||
|
SalientPoint* scaled_point = inlier_saliency[i].add_point();
|
||
|
scaled_point->CopyFrom(salient_point);
|
||
|
scaled_point->set_weight(scaled_point->weight() * scale);
|
||
|
}
|
||
|
} // end point traversal.
|
||
|
} // end frame traversal.
|
||
|
|
||
|
for (int k = 0; k < motion_saliency->size(); ++k) {
|
||
|
(*motion_saliency)[k]->Swap(&inlier_saliency[k]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void MotionSaliency::FilterMotionSaliency(
|
||
|
std::vector<SalientPointFrame*>* saliency_point_list) {
|
||
|
CHECK(saliency_point_list != nullptr);
|
||
|
const float sigma_time = options_.filtering_sigma_time();
|
||
|
const float sigma_space = options_.filtering_sigma_space();
|
||
|
|
||
|
const int time_radius = ceil(sigma_time * 1.5);
|
||
|
const int time_diameter = 2 * time_radius + 1;
|
||
|
|
||
|
// Create lookup table for weights.
|
||
|
std::vector<float> time_weights(time_diameter);
|
||
|
const float time_coeff = -0.5f / (sigma_time * sigma_time);
|
||
|
|
||
|
for (int i = -time_radius, time_idx = 0; i <= time_radius; ++i, ++time_idx) {
|
||
|
time_weights[time_idx] = std::exp(time_coeff * i * i);
|
||
|
}
|
||
|
|
||
|
// Ignore points further than 1.65 sigmas away (includes 90% of distribution).
|
||
|
const float space_cutoff = 1.65 * sigma_space;
|
||
|
const float space_exp_scale = -0.5f / (sigma_space * sigma_space);
|
||
|
|
||
|
// Copy saliency points.
|
||
|
const int num_frames = saliency_point_list->size();
|
||
|
std::vector<SalientPointFrame> points(num_frames + 2 * time_radius);
|
||
|
|
||
|
for (int k = 0; k < saliency_point_list->size(); ++k) {
|
||
|
points[time_radius + k].CopyFrom(*(*saliency_point_list)[k]);
|
||
|
}
|
||
|
|
||
|
// Copy border.
|
||
|
std::copy(points.rbegin() + time_radius, points.rbegin() + 2 * time_radius,
|
||
|
points.end() - time_radius);
|
||
|
std::copy(points.begin() + time_radius, points.begin() + 2 * time_radius,
|
||
|
points.rend() - time_radius);
|
||
|
|
||
|
// Apply filter.
|
||
|
for (int i = time_radius; i < num_frames + time_radius; ++i) {
|
||
|
const int frame_idx = i - time_radius;
|
||
|
for (auto& sample_point :
|
||
|
*(*saliency_point_list)[frame_idx]->mutable_point()) {
|
||
|
Vector2_f point_sum(0, 0);
|
||
|
// Sum for left, bottom, right, top tuple.
|
||
|
Vector4_f bound_sum;
|
||
|
Vector3_f ellipse_sum(0, 0, 0); // Captures major, minor and angle.
|
||
|
float weight_sum = 0;
|
||
|
float filter_sum = 0;
|
||
|
|
||
|
const float sample_angle = sample_point.angle();
|
||
|
for (int k = i - time_radius, time_idx = 0; k <= i + time_radius;
|
||
|
++k, ++time_idx) {
|
||
|
for (const auto& test_point : points[k].point()) {
|
||
|
const float diff = std::hypot(
|
||
|
test_point.norm_point_y() - sample_point.norm_point_y(),
|
||
|
test_point.norm_point_x() - sample_point.norm_point_x());
|
||
|
if (diff > space_cutoff) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
const float weight = time_weights[time_idx] * test_point.weight() *
|
||
|
std::exp(diff * diff * space_exp_scale);
|
||
|
|
||
|
filter_sum += weight;
|
||
|
point_sum +=
|
||
|
Vector2_f(test_point.norm_point_x(), test_point.norm_point_y()) *
|
||
|
weight;
|
||
|
bound_sum += Vector4_f(test_point.left(), test_point.bottom(),
|
||
|
test_point.right(), test_point.top()) *
|
||
|
weight;
|
||
|
weight_sum += test_point.weight() * weight;
|
||
|
|
||
|
// Ensure test_point and sample are less than pi / 2 apart.
|
||
|
float test_angle = test_point.angle();
|
||
|
if (fabs(test_angle - sample_angle) > M_PI / 2) {
|
||
|
if (sample_angle > M_PI / 2) {
|
||
|
test_angle += M_PI;
|
||
|
} else {
|
||
|
test_angle -= M_PI;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ellipse_sum += Vector3_f(test_point.norm_major(),
|
||
|
test_point.norm_minor(), test_angle) *
|
||
|
weight;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (filter_sum > 0) {
|
||
|
const float inv_filter_sum = 1.0f / filter_sum;
|
||
|
point_sum *= inv_filter_sum;
|
||
|
bound_sum *= inv_filter_sum;
|
||
|
weight_sum *= inv_filter_sum;
|
||
|
ellipse_sum *= inv_filter_sum;
|
||
|
}
|
||
|
|
||
|
sample_point.set_norm_point_x(point_sum.x());
|
||
|
sample_point.set_norm_point_y(point_sum.y());
|
||
|
sample_point.set_left(bound_sum.x());
|
||
|
sample_point.set_bottom(bound_sum.y());
|
||
|
sample_point.set_right(bound_sum.z());
|
||
|
sample_point.set_top(bound_sum.w());
|
||
|
|
||
|
sample_point.set_weight(weight_sum);
|
||
|
sample_point.set_norm_major(ellipse_sum.x());
|
||
|
sample_point.set_norm_minor(ellipse_sum.y());
|
||
|
sample_point.set_angle(ellipse_sum.z());
|
||
|
|
||
|
if (sample_point.angle() > M_PI) {
|
||
|
sample_point.set_angle(sample_point.angle() - M_PI);
|
||
|
}
|
||
|
if (sample_point.angle() < 0) {
|
||
|
sample_point.set_angle(sample_point.angle() + M_PI);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void MotionSaliency::CollapseMotionSaliency(
|
||
|
const SaliencyPointList& input_saliency, const Vector4_f& bounds,
|
||
|
SaliencyPointList* output_saliency) {
|
||
|
CHECK(output_saliency);
|
||
|
output_saliency->clear();
|
||
|
output_saliency->resize(input_saliency.size());
|
||
|
|
||
|
for (int f = 0; f < input_saliency.size(); ++f) { // traverse frames.
|
||
|
Vector2_f mean_saliency(0, 0);
|
||
|
float weight_sum = 0;
|
||
|
for (const auto& salient_point : input_saliency[f].point()) {
|
||
|
mean_saliency +=
|
||
|
Vector2_f(salient_point.norm_point_x(), salient_point.norm_point_y());
|
||
|
weight_sum += 1;
|
||
|
}
|
||
|
|
||
|
if (weight_sum > 0) {
|
||
|
SalientPoint* collapsed = (*output_saliency)[f].add_point();
|
||
|
collapsed->set_norm_point_x(mean_saliency.x() / weight_sum);
|
||
|
collapsed->set_norm_point_y(mean_saliency.y() / weight_sum);
|
||
|
collapsed->set_left(bounds.x());
|
||
|
collapsed->set_bottom(bounds.y());
|
||
|
collapsed->set_right(bounds.z());
|
||
|
collapsed->set_top(bounds.w());
|
||
|
collapsed->set_weight(1.0f);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
// Describes feature mode for a feature in a RegionFlowFeatureList stored
|
||
|
// at index feature_idx.
|
||
|
struct FeatureMode {
|
||
|
Vector2_f location;
|
||
|
float irls_weight;
|
||
|
int feature_idx;
|
||
|
int mode_bin;
|
||
|
};
|
||
|
|
||
|
// Determines mode for each feature in feature_view.
|
||
|
// Returns modes as list of pointers in mode_ptrs. Actual modes are stored
|
||
|
// binned into a grid of equal size as the passed FeatureGrid
|
||
|
// (bins of size grid_resolution x grid_resolution).
|
||
|
void DetermineFeatureModes(
|
||
|
const FeatureFrame<MotionSaliency::SalientLocation>& features,
|
||
|
float grid_resolution, const Vector2_i& grid_dims, float band_width,
|
||
|
const FeatureGrid<MotionSaliency::SalientLocation>& feature_grid,
|
||
|
const std::vector<std::vector<int>>& feature_taps,
|
||
|
const std::vector<float>& space_lut, float space_scale,
|
||
|
std::vector<std::list<FeatureMode>>* mode_grid,
|
||
|
std::vector<FeatureMode*>* mode_ptrs) {
|
||
|
CHECK(mode_grid);
|
||
|
CHECK(mode_ptrs);
|
||
|
const int num_features = features.size();
|
||
|
mode_ptrs->reserve(num_features);
|
||
|
|
||
|
const float grid_scale = 1.0f / grid_resolution;
|
||
|
int feature_idx = 0;
|
||
|
const int kMaxIter = 100;
|
||
|
// Set convergence radius to 0.1% of bandwidth.
|
||
|
const float sq_conv_radius = band_width * band_width * 1e-6f;
|
||
|
for (const auto& feature_ptr : features) {
|
||
|
Vector2_f center = feature_ptr->pt;
|
||
|
int iter = 0;
|
||
|
for (; iter < kMaxIter; ++iter) {
|
||
|
const int bin_x = center.x() * grid_scale;
|
||
|
const int bin_y = center.y() * grid_scale;
|
||
|
const int grid_loc = bin_y * grid_dims.x() + bin_x;
|
||
|
|
||
|
float sum_weight = 0;
|
||
|
Vector2_f new_center;
|
||
|
for (const auto& bin : feature_taps[grid_loc]) {
|
||
|
for (const auto& test_feat_ptr : feature_grid[bin]) {
|
||
|
const float dist = (test_feat_ptr->pt - center).Norm();
|
||
|
const float weight = space_lut[static_cast<int>(dist * space_scale)] *
|
||
|
test_feat_ptr->weight;
|
||
|
sum_weight += weight;
|
||
|
new_center += weight * test_feat_ptr->pt;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (sum_weight > 0) {
|
||
|
new_center *= (1.0f / sum_weight);
|
||
|
if ((center - new_center).Norm2() < sq_conv_radius) {
|
||
|
center = new_center;
|
||
|
break;
|
||
|
} else {
|
||
|
center = new_center;
|
||
|
}
|
||
|
} else {
|
||
|
LOG(WARNING) << "No features found in band_width radius, "
|
||
|
<< "should not happen. ";
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const int mode_bin_x = center.x() * grid_scale;
|
||
|
const int mode_bin_y = center.y() * grid_scale;
|
||
|
const int mode_grid_loc = mode_bin_y * grid_dims.x() + mode_bin_x;
|
||
|
FeatureMode mode{center, feature_ptr->weight, feature_idx, mode_grid_loc};
|
||
|
|
||
|
(*mode_grid)[mode_grid_loc].push_back(mode);
|
||
|
FeatureMode* added_mode = &(*mode_grid)[mode_grid_loc].back();
|
||
|
(*mode_ptrs).push_back(added_mode);
|
||
|
++feature_idx;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} // namespace.
|
||
|
|
||
|
void MotionSaliency::SalientModeFinding(std::vector<SalientLocation>* locations,
|
||
|
std::vector<SalientMode>* modes) {
|
||
|
CHECK(modes);
|
||
|
CHECK(locations);
|
||
|
if (locations->empty()) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Scale band_width to image domain.
|
||
|
const float band_width =
|
||
|
hypot(frame_width_, frame_height_) * options_.mode_band_width();
|
||
|
|
||
|
// Select all salient locations with non-zero weight.
|
||
|
FeatureFrame<SalientLocation> salient_features;
|
||
|
salient_features.reserve(locations->size());
|
||
|
for (auto& loc : *locations) {
|
||
|
if (loc.weight > 1e-6) {
|
||
|
salient_features.push_back(&loc);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const int num_features = salient_features.size();
|
||
|
if (num_features == 0) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Build feature grid according to bandwith.
|
||
|
std::vector<FeatureGrid<SalientLocation>> feature_grids;
|
||
|
std::vector<std::vector<int>> feature_taps;
|
||
|
|
||
|
// Guarantee at least 1.5 sigmas in each direction are captured with
|
||
|
// tap 3 filtering (86 % of the data).
|
||
|
const float grid_resolution = 1.5f * band_width;
|
||
|
Vector2_i grid_dims;
|
||
|
BuildFeatureGrid(
|
||
|
frame_width_, frame_height_, grid_resolution, {salient_features},
|
||
|
[](const SalientLocation& l) -> Vector2_f { return l.pt; }, &feature_taps,
|
||
|
nullptr, &grid_dims, &feature_grids);
|
||
|
|
||
|
// Just one frame input, expect one grid as output.
|
||
|
CHECK_EQ(1, feature_grids.size());
|
||
|
const auto& feature_grid = feature_grids[0];
|
||
|
|
||
|
// Setup Gaussian LUT for smoothing in space, using 2^10 discretization bins.
|
||
|
const int lut_bins = 1 << 10;
|
||
|
std::vector<float> space_lut(lut_bins);
|
||
|
|
||
|
// Using 3 tap smoothing, max distance is 2 bin diagonals.
|
||
|
// We use maximum of 2 * sqrt(2) * bin_radius plus 1% room in case maximum
|
||
|
// value is attained.
|
||
|
const float max_space_diff = sqrt(2.0) * 2.f * grid_resolution * 1.01f;
|
||
|
|
||
|
const float space_bin_size = max_space_diff / lut_bins;
|
||
|
const float space_scale = 1.0f / space_bin_size;
|
||
|
const float space_coeff = -0.5f / (band_width * band_width);
|
||
|
for (int i = 0; i < lut_bins; ++i) {
|
||
|
const float value = i * space_bin_size;
|
||
|
space_lut[i] = std::exp(value * value * space_coeff);
|
||
|
}
|
||
|
|
||
|
// Store modes for each grid bin (to be averaged later).
|
||
|
std::vector<std::list<FeatureMode>> mode_grid(grid_dims.x() * grid_dims.y());
|
||
|
std::vector<FeatureMode*> mode_ptrs;
|
||
|
|
||
|
DetermineFeatureModes(salient_features, grid_resolution, grid_dims,
|
||
|
band_width, feature_grid, feature_taps, space_lut,
|
||
|
space_scale, &mode_grid, &mode_ptrs);
|
||
|
|
||
|
// Read out modes, ordered by decreasing weight.
|
||
|
struct FeatureModeComparator {
|
||
|
bool operator()(const FeatureMode* mode_lhs,
|
||
|
const FeatureMode* mode_rhs) const {
|
||
|
return mode_lhs->irls_weight > mode_rhs->irls_weight;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Sort pointers, to keep order immutable during flagging operations.
|
||
|
std::sort(mode_ptrs.begin(), mode_ptrs.end(), FeatureModeComparator());
|
||
|
|
||
|
for (int m = 0; m < mode_ptrs.size(); ++m) {
|
||
|
// We mark a mode as processed by assigning -1 to its index.
|
||
|
if (mode_ptrs[m]->feature_idx < 0) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
FeatureMode* mode = mode_ptrs[m];
|
||
|
|
||
|
// Average modes within band_width based on irls_weight * spatial weight.
|
||
|
double sum_weight = mode->irls_weight;
|
||
|
double mode_x = sum_weight * mode->location.x();
|
||
|
double mode_y = sum_weight * mode->location.y();
|
||
|
|
||
|
const Vector2_f& feat_loc = salient_features[mode->feature_idx]->pt;
|
||
|
double feat_x = sum_weight * feat_loc.x();
|
||
|
double feat_y = sum_weight * feat_loc.y();
|
||
|
double feat_xx = sum_weight * feat_loc.x() * feat_loc.x();
|
||
|
double feat_xy = sum_weight * feat_loc.x() * feat_loc.y();
|
||
|
double feat_yy = sum_weight * feat_loc.y() * feat_loc.y();
|
||
|
|
||
|
mode->feature_idx = -1; // Flag as processed, does not change order
|
||
|
// of traversal.
|
||
|
|
||
|
for (const auto& bin : feature_taps[mode->mode_bin]) {
|
||
|
for (auto& test_mode : mode_grid[bin]) {
|
||
|
if (test_mode.feature_idx >= 0) {
|
||
|
const float dist = (test_mode.location - mode->location).Norm();
|
||
|
if (dist <= band_width) {
|
||
|
const Vector2_f test_loc =
|
||
|
salient_features[test_mode.feature_idx]->pt;
|
||
|
const float weight =
|
||
|
space_lut[static_cast<int>(dist * space_scale)] *
|
||
|
test_mode.irls_weight;
|
||
|
|
||
|
sum_weight += weight;
|
||
|
mode_x += weight * test_mode.location.x();
|
||
|
mode_y += weight * test_mode.location.y();
|
||
|
|
||
|
const float test_loc_x_w = weight * test_loc.x();
|
||
|
const float test_loc_y_w = weight * test_loc.y();
|
||
|
feat_x += test_loc_x_w;
|
||
|
feat_y += test_loc_y_w;
|
||
|
|
||
|
feat_xx += test_loc_x_w * test_loc.x();
|
||
|
feat_xy += test_loc_x_w * test_loc.y();
|
||
|
feat_yy += test_loc_y_w * test_loc.y();
|
||
|
|
||
|
// Flag as processed, does not change order of traversal.
|
||
|
test_mode.feature_idx = -1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (sum_weight >= options_.min_irls_mode_weight()) {
|
||
|
double inv_sum_weight = 1.0f / sum_weight;
|
||
|
mode_x *= inv_sum_weight;
|
||
|
mode_y *= inv_sum_weight;
|
||
|
feat_x *= inv_sum_weight;
|
||
|
feat_y *= inv_sum_weight;
|
||
|
feat_xx *= inv_sum_weight;
|
||
|
feat_xy *= inv_sum_weight;
|
||
|
feat_yy *= inv_sum_weight;
|
||
|
|
||
|
// Covariance matrix.
|
||
|
const float a = feat_xx - 2.0 * feat_x * mode_x + mode_x * mode_x;
|
||
|
const float bc =
|
||
|
feat_xy - feat_x * mode_y - feat_y * mode_x + mode_x * mode_y;
|
||
|
const float d = feat_yy - 2.0 * feat_y * mode_y + mode_y * mode_y;
|
||
|
|
||
|
Vector2_f axis_magnitude;
|
||
|
float angle;
|
||
|
if (!EllipseFromCovariance(a, bc, d, &axis_magnitude, &angle)) {
|
||
|
angle = 0;
|
||
|
axis_magnitude = Vector2_f(1, 1);
|
||
|
} else {
|
||
|
if (angle < 0) {
|
||
|
angle += M_PI;
|
||
|
}
|
||
|
CHECK_GE(angle, 0);
|
||
|
CHECK_LE(angle, M_PI + 1e-3);
|
||
|
}
|
||
|
|
||
|
SalientMode irls_mode;
|
||
|
irls_mode.location = Vector2_f(mode_x, mode_y);
|
||
|
irls_mode.assignment_weight = sum_weight;
|
||
|
irls_mode.axis_magnitude = axis_magnitude;
|
||
|
irls_mode.angle = angle;
|
||
|
modes->push_back(irls_mode);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Sort modes by descreasing weight.
|
||
|
struct ModeWeightCompare {
|
||
|
bool operator()(const SalientMode& lhs, const SalientMode& rhs) const {
|
||
|
return lhs.assignment_weight > rhs.assignment_weight;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
std::sort(modes->begin(), modes->end(), ModeWeightCompare());
|
||
|
}
|
||
|
|
||
|
// Determines the salient frame for a list of SalientLocations by performing
|
||
|
// mode finding and scales each point based on frame size.
|
||
|
void MotionSaliency::DetermineSalientFrame(
|
||
|
std::vector<SalientLocation> locations, SalientPointFrame* salient_frame) {
|
||
|
CHECK(salient_frame);
|
||
|
|
||
|
std::vector<SalientMode> modes;
|
||
|
{
|
||
|
MEASURE_TIME << "Mode finding";
|
||
|
SalientModeFinding(&locations, &modes);
|
||
|
}
|
||
|
|
||
|
const float denom_x = 1.0f / frame_width_;
|
||
|
const float denom_y = 1.0f / frame_height_;
|
||
|
|
||
|
// Convert to salient points.
|
||
|
for (int mode_idx = 0,
|
||
|
mode_sz = std::min<int>(modes.size(), options_.num_top_irls_modes());
|
||
|
mode_idx < mode_sz; ++mode_idx) {
|
||
|
SalientPoint* pt = salient_frame->add_point();
|
||
|
pt->set_norm_point_x(modes[mode_idx].location.x());
|
||
|
pt->set_norm_point_y(modes[mode_idx].location.y());
|
||
|
pt->set_left(options_.bound_left());
|
||
|
pt->set_bottom(options_.bound_bottom());
|
||
|
pt->set_right(options_.bound_right());
|
||
|
pt->set_top(options_.bound_top());
|
||
|
|
||
|
pt->set_norm_major(modes[mode_idx].axis_magnitude.x());
|
||
|
pt->set_norm_minor(modes[mode_idx].axis_magnitude.y());
|
||
|
pt->set_angle(modes[mode_idx].angle);
|
||
|
pt->set_weight(modes[mode_idx].assignment_weight *
|
||
|
options_.saliency_weight());
|
||
|
|
||
|
ScaleSalientPoint(denom_x, denom_y, pt);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ForegroundWeightsFromFeatures(const RegionFlowFeatureList& feature_list,
|
||
|
float foreground_threshold,
|
||
|
float foreground_gamma,
|
||
|
const CameraMotion* camera_motion,
|
||
|
std::vector<float>* weights) {
|
||
|
CHECK(weights != nullptr);
|
||
|
weights->clear();
|
||
|
|
||
|
constexpr float kEpsilon = 1e-4f;
|
||
|
|
||
|
CHECK_GT(foreground_threshold, 0.0f);
|
||
|
if (camera_motion) {
|
||
|
foreground_threshold *=
|
||
|
std::max(kEpsilon, InlierCoverage(*camera_motion, false));
|
||
|
}
|
||
|
|
||
|
const float weight_denom = 1.0f / foreground_threshold;
|
||
|
|
||
|
// Map weights to foreground measure and determine minimum irls weight.
|
||
|
for (const auto& feature : feature_list.feature()) {
|
||
|
// Skip marked outliers.
|
||
|
if (feature.irls_weight() == 0) {
|
||
|
weights->push_back(0.0f);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// Maps an irls_weight of magnitude weight_denom (from above) to zero,
|
||
|
// with values below weight_denom assigned linearly mapped (zero is mapped
|
||
|
// ot 1). Avoid mapping to zero as it used to mark outliers.
|
||
|
const float foreground_measure =
|
||
|
std::max(0.0f, 1.0f - feature.irls_weight() * weight_denom);
|
||
|
|
||
|
if (std::abs(foreground_gamma - 1.0f) < 1e-3f) {
|
||
|
weights->push_back(std::max(kEpsilon, foreground_measure));
|
||
|
} else {
|
||
|
weights->push_back(
|
||
|
std::max(kEpsilon, std::pow(foreground_measure, foreground_gamma)));
|
||
|
}
|
||
|
}
|
||
|
CHECK_EQ(feature_list.feature_size(), weights->size());
|
||
|
}
|
||
|
|
||
|
} // namespace mediapipe
|