464 lines
20 KiB
C
464 lines
20 KiB
C
|
// Copyright 2019 The MediaPipe Authors.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
// Computes the RegionFlow for a set of frames.
|
||
|
// Specifically, extracts Harris-like features from each frame, tracks these
|
||
|
// between frames and regularizes the tracked features locally (outlier
|
||
|
// rejection) by leveraging fast per-frame segmentation.
|
||
|
// Optionally, features can be assigned to either foreground or background based
|
||
|
// on the computation of the fundamental matrix for a pair of frames,
|
||
|
//
|
||
|
// Basic usage:
|
||
|
// RegionFlowComputation flow_computation(RegionFlowComputationOptions(),
|
||
|
// frame_width,
|
||
|
// frame_height);
|
||
|
//
|
||
|
// std::vector<cv::Mat> input_images; // Supplied by caller.
|
||
|
// for (int i = 0; i < num_frames; ++i) {
|
||
|
// flow_computation.AddImage(input_images[i]);
|
||
|
//
|
||
|
// // Result is owned by this caller.
|
||
|
// std::unique_ptr<RegionFlow> result(
|
||
|
// flow_computation.RetrieveRegionFlow());
|
||
|
//
|
||
|
// // OR
|
||
|
// std::unique_ptr<RegionFlowFeatureList> result(
|
||
|
// flow_computation.RetrieveRegionFlowFeatureList(
|
||
|
// true, // Compute feature descriptor.
|
||
|
// false, // no match descriptor for this example.
|
||
|
// &input_images[i],
|
||
|
// nullptr);
|
||
|
//
|
||
|
// // Do your custom processing or pass on to MotionEstimation.
|
||
|
//
|
||
|
// }
|
||
|
|
||
|
#ifndef MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
|
||
|
#define MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
|
||
|
|
||
|
#include <deque>
|
||
|
#include <memory>
|
||
|
#include <unordered_map>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "mediapipe/framework/port/integral_types.h"
|
||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||
|
#include "mediapipe/util/tracking/motion_models.pb.h"
|
||
|
#include "mediapipe/util/tracking/region_flow.h"
|
||
|
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||
|
#include "mediapipe/util/tracking/region_flow_computation.pb.h"
|
||
|
|
||
|
namespace mediapipe {
|
||
|
class RegionFlowFeatureList;
|
||
|
class RegionFlowFrame;
|
||
|
} // namespace mediapipe
|
||
|
|
||
|
namespace mediapipe {
|
||
|
|
||
|
struct TrackedFeature;
|
||
|
typedef std::vector<TrackedFeature> TrackedFeatureList;
|
||
|
class MotionAnalysis;
|
||
|
|
||
|
class RegionFlowComputation {
|
||
|
public:
|
||
|
RegionFlowComputation(const RegionFlowComputationOptions& options,
|
||
|
int frame_width, int frame_height);
|
||
|
virtual ~RegionFlowComputation();
|
||
|
RegionFlowComputation(const RegionFlowComputation&) = delete;
|
||
|
RegionFlowComputation& operator=(const RegionFlowComputation&) = delete;
|
||
|
|
||
|
// Performs motion analysis on source w.r.t. to source passed in previous
|
||
|
// call. Therefore, first call will compute empty flow. If
|
||
|
// RegionFlowComputationOptions::frame_to_track := ftt > 0, motion analysis
|
||
|
// performed w.r.t. the previous ftt source passed via AddImage.
|
||
|
// Motion analysis uses grid-based regions to enforce locally consistent flow.
|
||
|
// Source is expected to be 3-channel RGB 8bit image (24bit in total), OR
|
||
|
// 1-channel Grayscale 8bit image, compatible with
|
||
|
// RegionFlowComputationOptions::ImageFormat.
|
||
|
// Pass the frame's timestamp to have it stored in the result or zero if not
|
||
|
// needed.
|
||
|
// Returns true on success, false otherwise.
|
||
|
virtual bool AddImage(const cv::Mat& source, int64 timestamp_usec);
|
||
|
|
||
|
// Same as above, but seed initial feature position in the matching frame
|
||
|
// with initial_transform.
|
||
|
virtual bool AddImageWithSeed(const cv::Mat& source, int64 timestamp_usec,
|
||
|
const Homography& initial_transform);
|
||
|
|
||
|
// Same as AddImage but also accepts an optional source_mask (pass empty
|
||
|
// cv::Mat to get the same behavior as AddImage). If non-empty, features are
|
||
|
// only extracted in regions where the mask value is > 0. Mask should be 8-bit
|
||
|
// grayscale of the same size as source, unless empty.
|
||
|
virtual bool AddImageWithMask(const cv::Mat& source,
|
||
|
const cv::Mat& source_mask,
|
||
|
int64 timestamp_usec);
|
||
|
|
||
|
// Call after AddImage* to retrieve last downscaled, grayscale image.
|
||
|
cv::Mat GetGrayscaleFrameFromResults();
|
||
|
|
||
|
// Returns result as RegionFlowFrame. Result is owned by caller.
|
||
|
// Will return NULL if called twice without AddImage* call.
|
||
|
virtual RegionFlowFrame* RetrieveRegionFlow();
|
||
|
|
||
|
// Returns result as RegionFlowFeatureList. Result is owned by caller.
|
||
|
// Will return NULL if called twice without AddImage* call.
|
||
|
// Computes optionally feature descriptors (if compute_feature_descriptor
|
||
|
// is set, in that case curr_color_image must not be NULL)
|
||
|
// and additionally matching descriptor (if compute_match_descriptor is set,
|
||
|
// in this case prev_color_image must not be NULL).
|
||
|
// Passed images should be in sync with those passed to AddImage, i.e.
|
||
|
// source in AddImage and parameter curr_color_image should refer to the same
|
||
|
// image.
|
||
|
virtual RegionFlowFeatureList* RetrieveRegionFlowFeatureList(
|
||
|
bool compute_feature_descriptor, bool compute_match_descriptor,
|
||
|
const cv::Mat* curr_color_image, // optional.
|
||
|
const cv::Mat* prev_color_image); // optional.
|
||
|
|
||
|
// Same as above, but returns specific tracked result from current frame C
|
||
|
// to C - track_index - 1.
|
||
|
virtual RegionFlowFeatureList* RetrieveMultiRegionFlowFeatureList(
|
||
|
int track_index, bool compute_feature_descriptor,
|
||
|
bool compute_match_descriptor,
|
||
|
const cv::Mat* curr_color_image, // optional.
|
||
|
const cv::Mat* prev_color_image); // optional.
|
||
|
|
||
|
// Returns result of a specific RegionFlowFrame in case
|
||
|
// RegionFlowComputationOptions::frames_to_track() > 1. Result is owned by
|
||
|
// caller.
|
||
|
virtual RegionFlowFrame* RetrieveMultiRegionFlow(int frame);
|
||
|
|
||
|
// Resets computation to ignore any previously added frames. Next frame passed
|
||
|
// via AddImageXXX() routines will be treated as the first frame in the
|
||
|
// sequence.
|
||
|
virtual void Reset();
|
||
|
|
||
|
// Creates synthetic tracks with feature points in a grid with zero motion
|
||
|
// w.r.t. prev frame. Points are located at the center of each grid. Step size
|
||
|
// is fractional w.r.t. image size.
|
||
|
static void ZeroMotionGridFeatures(int frame_width, int frame_height,
|
||
|
float frac_grid_step_x,
|
||
|
float frac_grid_step_y,
|
||
|
RegionFlowFeatureList* result);
|
||
|
|
||
|
// Returns densly sampled motions zero motion features.
|
||
|
// Features are centered in a box of size frac_diameter that is shifted by
|
||
|
// frac_steps_x * frame_width and frac_steps_y * frame_height.
|
||
|
static void DenseZeroMotionSamples(int frame_width, int frame_height,
|
||
|
float frac_diameter, float frac_steps_x,
|
||
|
float frac_steps_y,
|
||
|
RegionFlowFeatureList* result);
|
||
|
|
||
|
private:
|
||
|
typedef std::vector<std::unique_ptr<RegionFlowFeatureList>>
|
||
|
RegionFlowFeatureListVector;
|
||
|
|
||
|
typedef std::vector<TrackedFeature*> TrackedFeatureView;
|
||
|
|
||
|
// Indexed via grid bin, each bin contains list of its corresponding features.
|
||
|
typedef std::vector<TrackedFeatureView> TrackedFeatureMap;
|
||
|
|
||
|
struct FrameTrackingData;
|
||
|
struct LongTrackData;
|
||
|
struct ORBFeatureDescriptors;
|
||
|
|
||
|
// Implementation function to retrieve the i-th RegionFlowFeatureList
|
||
|
// (specified track_index). Specifically, i-th feature list, denotes the flow
|
||
|
// from the current frame N to the previous frame N - 1 - track_index.
|
||
|
// Casts arguments to cv.
|
||
|
virtual std::unique_ptr<RegionFlowFeatureList>
|
||
|
RetrieveRegionFlowFeatureListImpl(int track_index,
|
||
|
bool compute_feature_descriptor,
|
||
|
bool compute_match_descriptor,
|
||
|
const cv::Mat* curr_color_image,
|
||
|
const cv::Mat* prev_color_image);
|
||
|
|
||
|
// Initializes the FrameTrackingData's members from source and source_mask.
|
||
|
// Returns true on success.
|
||
|
bool InitFrame(const cv::Mat& source, const cv::Mat& source_mask,
|
||
|
FrameTrackingData* data);
|
||
|
|
||
|
// Adds image to the current buffer and starts tracking.
|
||
|
bool AddImageAndTrack(const cv::Mat& source, const cv::Mat& source_mask,
|
||
|
int64 timestamp_usec,
|
||
|
const Homography& initial_transform);
|
||
|
|
||
|
// Computes *change* in visual difference between adjacent frames. Normalized
|
||
|
// w.r.t. number of channels and number of pixels. For this to be meaningful
|
||
|
// it is expected that passed FrameTrackingData's are exactly one frame apart
|
||
|
// (CHECKED).
|
||
|
float ComputeVisualConsistency(FrameTrackingData* previous,
|
||
|
FrameTrackingData* current) const;
|
||
|
|
||
|
// Computes flow regularized based on regions and other options, from frame
|
||
|
// index "from" to index "to", specified relative to current frame, i.e. index
|
||
|
// of current frame = 0, prev frame = -1, next frame = 1, etc. Set invert_flow
|
||
|
// to true if the flow should be inverted after tracking.
|
||
|
// Optionally, can input the previous result to link features via ids,
|
||
|
// effectively creating long feature tracks. In this case you usually want to
|
||
|
// request the current result (same as returned in feature_list) in form of
|
||
|
// a TrackedFeatureList.
|
||
|
void ComputeRegionFlow(int from, int to, bool synthetic_tracks,
|
||
|
bool invert_flow,
|
||
|
const TrackedFeatureList* prev_result, // optional.
|
||
|
TrackedFeatureList* curr_result, // optional.
|
||
|
RegionFlowFeatureList* feature_list);
|
||
|
|
||
|
// Gain corrects input frame w.r.t. reference frame. Returns true iff gain
|
||
|
// correction succeeds. If false, calibrated_frame is left untouched.
|
||
|
bool GainCorrectFrame(const cv::Mat& reference_frame,
|
||
|
const cv::Mat& input_frame, float reference_mean,
|
||
|
float input_mean, cv::Mat* calibrated_frame) const;
|
||
|
|
||
|
// Feature extraction method.
|
||
|
// Expects as input an image pyramid of gray scale image (each subsequent
|
||
|
// level should be downsampled by a factor of 2 (always rounding up), CHECKED
|
||
|
// against).
|
||
|
// For each level extracts corner features across a grid by considering all
|
||
|
// locations that have a corner response corner repsonse above
|
||
|
// options_.feature_quality_level() * maximum within the grid bin.
|
||
|
// Features with high corner response are output first (but corner response is
|
||
|
// not necessarily monotonic). Feature locations are binned into mask
|
||
|
// (via downscaling by mask_scale), using a 5x5 patch, to discarded features
|
||
|
// that are too close to each other.
|
||
|
// Features and corner responses are added to the corresponding vectors in
|
||
|
// data, i.e. passed data is not cleared and expected to be initialized.
|
||
|
virtual void AdaptiveGoodFeaturesToTrack(
|
||
|
const std::vector<cv::Mat>& extraction_pyramid, int max_features,
|
||
|
float mask_scale, cv::Mat* mask, FrameTrackingData* data);
|
||
|
|
||
|
// Uses prev_result to remove all features that are not present in data.
|
||
|
// Uses track_ids, i.e. only works with long feature processing.
|
||
|
void RemoveAbsentFeatures(const TrackedFeatureList& prev_result,
|
||
|
FrameTrackingData* data);
|
||
|
|
||
|
// Remove features in data that lie outside the feature extraction mask for
|
||
|
// that frame.
|
||
|
void RemoveFeaturesOutsideMask(FrameTrackingData* data);
|
||
|
|
||
|
// Extracts features for tracking from frame corresponding to data.
|
||
|
// Optionally, may reuse tracked features if available, based on options.
|
||
|
// Optionally, if new features are extracted, can use feature_list to mask out
|
||
|
// feature locations that should not be extracted again.
|
||
|
void ExtractFeatures(const TrackedFeatureList* prev_result,
|
||
|
FrameTrackingData* data);
|
||
|
|
||
|
// Performs inplace feature selection, by evaluating the range
|
||
|
// [0, data->features.size()] via an Evaluator implementing
|
||
|
// [](int) -> bool. Only feature indices for which eval returns true are kept
|
||
|
// (using in place moves) the remainder is discarded. Applies moves operation
|
||
|
// to FrameTrackingData's feature, track_idx, feature_source_map and
|
||
|
// neighborhoods. Also applies moves to any vector<int> and vector<float>
|
||
|
// that can be optionally supplied.
|
||
|
// Note: All vectors are assumed to of the same size (checked in debug
|
||
|
// mode).
|
||
|
template <class Evaluator>
|
||
|
int InplaceFeatureSelection(FrameTrackingData* data,
|
||
|
std::vector<std::vector<int>*> int_vecs,
|
||
|
std::vector<std::vector<float>*> float_vecs,
|
||
|
const Evaluator& eval);
|
||
|
|
||
|
// Tracks features between two frames (from -> to). Operates on internal data
|
||
|
// structure FrameTrackingData which stores all frame information relavant for
|
||
|
// tracking.
|
||
|
//
|
||
|
// If gain_correct is true, tracking is carried out between the "from" and the
|
||
|
// gain-corrected "to" image. It is also an output variable indicating whether
|
||
|
// gain correction succeeded or failed.
|
||
|
//
|
||
|
// Updates internal data structure, so any computation can be reused in
|
||
|
// successive calls to feature extraction or tracking.
|
||
|
void TrackFeatures(FrameTrackingData* from_data_ptr,
|
||
|
FrameTrackingData* to_data_ptr, bool* gain_correct,
|
||
|
float* frac_long_features_rejected,
|
||
|
TrackedFeatureList* results);
|
||
|
|
||
|
// Wide-baseline version of above function, using feature descriptor matching
|
||
|
// instead of tracking.
|
||
|
void WideBaselineMatchFeatures(FrameTrackingData* from_data_ptr,
|
||
|
FrameTrackingData* to_data_ptr,
|
||
|
TrackedFeatureList* results);
|
||
|
|
||
|
// Fits affine model to TrackedFeatureList via direct call of
|
||
|
// MotionEstimation::EstimateAffineModelIRLS.
|
||
|
AffineModel AffineModelFromFeatures(TrackedFeatureList* features) const;
|
||
|
|
||
|
// Creates synthetic tracks with feature points in a grid with zero motion
|
||
|
// w.r.t. prev frame. Points are located at the center of each grid. Step size
|
||
|
// is fractional w.r.t. image size.
|
||
|
// Returns minimum distance from border across all features.
|
||
|
static int ZeroMotionGridTracks(int frame_width, int frame_height,
|
||
|
float frac_grid_step_x,
|
||
|
float frac_grid_step_y,
|
||
|
TrackedFeatureList* results);
|
||
|
|
||
|
// Computes region flow using a rectangular grid of square regions.
|
||
|
void ComputeBlockBasedFlow(TrackedFeatureList* feature_list,
|
||
|
TrackedFeatureView* inlier_features) const;
|
||
|
|
||
|
// Initializes feature locations for FrameTrackingData at index to,
|
||
|
// from resulting tracks in from.
|
||
|
void InitializeFeatureLocationsFromPreviousResult(int from, int to);
|
||
|
|
||
|
// Initializes feature locations in "to" from initial transform by applying
|
||
|
// it to every feature of "from".
|
||
|
void InitializeFeatureLocationsFromTransform(int from, int to,
|
||
|
const Homography& transform);
|
||
|
|
||
|
// Enforces a translational model within each region, only retaining inliers
|
||
|
// that are output to inliers.
|
||
|
void DetermineRegionFlowInliers(const TrackedFeatureMap& region_feature_map,
|
||
|
TrackedFeatureView* inliers) const;
|
||
|
|
||
|
// Determines number of minimum inliers based on absolute and relative
|
||
|
// thresholds.
|
||
|
int GetMinNumFeatureInliers(
|
||
|
const TrackedFeatureMap& region_feature_map) const;
|
||
|
|
||
|
// Internal conversion function from a feature list to corresponding frame.
|
||
|
void RegionFlowFeatureListToRegionFlow(
|
||
|
const RegionFlowFeatureList& feature_list, RegionFlowFrame* frame) const;
|
||
|
|
||
|
// Initializes all members except actual features in a RegionFlowFeatureList.
|
||
|
void InitializeRegionFlowFeatureList(
|
||
|
RegionFlowFeatureList* region_flow_feature_list) const;
|
||
|
|
||
|
// Converts TrackedFeatureView to RegionFlowFeatureList, flattening over
|
||
|
// all bins. Returns average motion magnitude.
|
||
|
// Optionally TrackedFeature's corresponding to each feature output in
|
||
|
// region_flow_feature_list can be recorded via flattened_feature_list.
|
||
|
float TrackedFeatureViewToRegionFlowFeatureList(
|
||
|
const TrackedFeatureView& region_feature_view,
|
||
|
TrackedFeatureList* flattened_feature_list,
|
||
|
RegionFlowFeatureList* region_flow_feature_list) const;
|
||
|
|
||
|
// Determines if sufficient (spatially distributed) features are available.
|
||
|
bool HasSufficientFeatures(const RegionFlowFeatureList& feature_list);
|
||
|
|
||
|
// Returns number of required pyramid levels to track the specified distance.
|
||
|
int PyramidLevelsFromTrackDistance(float track_distance);
|
||
|
|
||
|
// Returns blur score (inverse of average corner measure) for input image.
|
||
|
// The higher the value the blurrier the frame.
|
||
|
float ComputeBlurScore(const cv::Mat& image);
|
||
|
|
||
|
// Computes binary mask of pixels, for which the corner score (passed in
|
||
|
// min_eig_vals) can be used to as a measure to quanity the amount of blur.
|
||
|
// For pixelx not part of the mask the corner score is not a reliable measure
|
||
|
// to quanity blur. For example, discards over-exposed regions and regions
|
||
|
// that do not have sufficient cornerness.
|
||
|
// Note: Modifies the corner values!
|
||
|
void ComputeBlurMask(const cv::Mat& input, cv::Mat* min_eig_vals,
|
||
|
cv::Mat* mask);
|
||
|
|
||
|
// Appends features in a sorted manner (by pointer location) while discarding
|
||
|
// duplicates.
|
||
|
void AppendUniqueFeaturesSorted(const TrackedFeatureView& to_be_added,
|
||
|
TrackedFeatureView* features) const;
|
||
|
|
||
|
void GetFeatureTrackInliers(bool skip_estimation,
|
||
|
TrackedFeatureList* features,
|
||
|
TrackedFeatureView* inliers) const;
|
||
|
|
||
|
bool IsVerifyLongFeatures() const {
|
||
|
return long_track_data_ != nullptr && options_.verify_long_features();
|
||
|
}
|
||
|
|
||
|
int DownsampleWidth() const { return frame_width_; }
|
||
|
int DownsampleHeight() const { return frame_height_; }
|
||
|
|
||
|
// Returns 1.0 / scale that is being applied to the features for downscaling.
|
||
|
float DownsampleScale() const { return downsample_scale_; }
|
||
|
|
||
|
private:
|
||
|
RegionFlowComputationOptions options_;
|
||
|
|
||
|
// Frame width and height after downsampling.
|
||
|
int frame_width_;
|
||
|
int frame_height_;
|
||
|
|
||
|
// Number of frames w.r.t each frame is tracked.
|
||
|
int frames_to_track_;
|
||
|
// Maximum length of long feature tracks in frames.
|
||
|
int max_long_track_length_;
|
||
|
|
||
|
// Original frame width and height.
|
||
|
int original_width_;
|
||
|
int original_height_;
|
||
|
|
||
|
// Scale and state of downsampling.
|
||
|
float downsample_scale_;
|
||
|
bool use_downsampling_;
|
||
|
|
||
|
int pyramid_levels_;
|
||
|
int extraction_levels_;
|
||
|
|
||
|
int frame_num_ = 0;
|
||
|
int max_features_ = 0;
|
||
|
float curr_blur_score_ = 0;
|
||
|
// Moving average of number of features across recently computed tracks.
|
||
|
float curr_num_features_avg_ = 0;
|
||
|
|
||
|
// Count used to generate unique feature ids.
|
||
|
int feature_count_ = 0;
|
||
|
|
||
|
// List of RegionFlow frames of size options_.frames_to_track.
|
||
|
RegionFlowFeatureListVector region_flow_results_;
|
||
|
|
||
|
// Gain adapted version.
|
||
|
std::unique_ptr<cv::Mat> gain_image_;
|
||
|
std::unique_ptr<cv::Mat> gain_pyramid_;
|
||
|
|
||
|
// Temporary buffers.
|
||
|
std::unique_ptr<cv::Mat> corner_values_;
|
||
|
std::unique_ptr<cv::Mat> corner_filtered_;
|
||
|
std::unique_ptr<cv::Mat> corner_mask_;
|
||
|
|
||
|
std::unique_ptr<cv::Mat> curr_color_image_;
|
||
|
|
||
|
// Temporary images for feature extraction.
|
||
|
std::unique_ptr<cv::Mat> feature_tmp_image_1_;
|
||
|
std::unique_ptr<cv::Mat> feature_tmp_image_2_;
|
||
|
|
||
|
std::vector<uint8> feature_status_; // Indicates if point could be
|
||
|
// tracked.
|
||
|
std::vector<float> feature_track_error_; // Patch-based error.
|
||
|
|
||
|
// Circular queue to buffer tracking data.
|
||
|
std::deque<std::unique_ptr<FrameTrackingData>> data_queue_;
|
||
|
|
||
|
// Global settings for block based flow.
|
||
|
int block_width_;
|
||
|
int block_height_;
|
||
|
int block_levels_;
|
||
|
|
||
|
// Stores average flow magnitudes for recently processed frames.
|
||
|
std::deque<float> flow_magnitudes_;
|
||
|
|
||
|
// Records data for long feature tracks.
|
||
|
std::unique_ptr<LongTrackData> long_track_data_;
|
||
|
|
||
|
bool use_cv_tracking_ = false;
|
||
|
|
||
|
// Counter used for controlling how ofter do we run descriptor extraction.
|
||
|
// Count from 0 to options_.extract_descriptor_every_n_frame() - 1.
|
||
|
// Extract descriptors only when counter == 0.
|
||
|
int cnt_extract_descriptors_ = 0;
|
||
|
|
||
|
friend class MotionAnalysis;
|
||
|
};
|
||
|
|
||
|
} // namespace mediapipe
|
||
|
|
||
|
#endif // MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
|