649 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			649 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2019 The MediaPipe Authors.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //      http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #ifndef MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
 | |
| #define MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
 | |
| 
 | |
| // Performs tracking via rectangular regions (MotionBoxes) from pre-initialized
 | |
| // positions, using metadata from tracked features (TrackingData converted to
 | |
| // MotionVectorFrames), forward and backward in time.
 | |
| 
 | |
| #include <deque>
 | |
| #include <tuple>
 | |
| #include <unordered_map>
 | |
| #include <unordered_set>
 | |
| #include <vector>
 | |
| 
 | |
| #include "absl/container/flat_hash_set.h"
 | |
| #include "mediapipe/framework/port/vector.h"
 | |
| #include "mediapipe/util/tracking/flow_packager.pb.h"
 | |
| #include "mediapipe/util/tracking/motion_models.h"
 | |
| #include "mediapipe/util/tracking/motion_models.pb.h"
 | |
| #include "mediapipe/util/tracking/tracking.pb.h"
 | |
| 
 | |
| namespace mediapipe {
 | |
| 
 | |
| // Useful helper functions.
 | |
| //
 | |
| // Clamps values to be within interval [left, right].
 | |
| inline float Clamp(float value, float left, float right) {
 | |
|   return value < left ? left : (value > right ? right : value);
 | |
| }
 | |
| 
 | |
| // Standard linear interpolation function.
 | |
| template <class T>
 | |
| T Lerp(T a, T b, float alpha) {
 | |
|   return static_cast<T>(a * (1.0f - alpha) + b * alpha);
 | |
| }
 | |
| 
 | |
| // Approximates sigmoid function with a linear ramp, mapping
 | |
| // x <= lhs to 0, x >= rhs to 1 (for lhs < rhs) linear in between interval
 | |
| // [lhs, rhs]. If lhs > rhs, roles are reversed.
 | |
| inline float LinearRamp(float value, float lhs, float rhs) {
 | |
|   return Clamp((value - lhs) / (rhs - lhs), 0, 1);
 | |
| }
 | |
| 
 | |
| inline Vector2_f MotionBoxPosition(const MotionBoxState& state) {
 | |
|   return Vector2_f(state.pos_x(), state.pos_y());
 | |
| }
 | |
| 
 | |
| inline void SetMotionBoxPosition(const Vector2_f& pos, MotionBoxState* state) {
 | |
|   state->set_pos_x(pos.x());
 | |
|   state->set_pos_y(pos.y());
 | |
| }
 | |
| 
 | |
| // TODO: this needs to be changed for quad
 | |
| inline Vector2_f MotionBoxSize(const MotionBoxState& state) {
 | |
|   return Vector2_f(state.width(), state.height());
 | |
| }
 | |
| 
 | |
| inline void SetMotionBoxSize(const Vector2_f& size, MotionBoxState* state) {
 | |
|   state->set_width(size.x());
 | |
|   state->set_height(size.y());
 | |
| }
 | |
| 
 | |
| inline Vector2_f MotionBoxCenter(const MotionBoxState& state) {
 | |
|   return MotionBoxPosition(state) + 0.5f * MotionBoxSize(state);
 | |
| }
 | |
| 
 | |
| inline Vector2_f InlierCenter(const MotionBoxState& state) {
 | |
|   return Vector2_f(state.inlier_center_x(), state.inlier_center_y());
 | |
| }
 | |
| 
 | |
| inline Vector2_f MotionBoxVelocity(const MotionBoxState& state) {
 | |
|   return Vector2_f(state.dx(), state.dy());
 | |
| }
 | |
| 
 | |
| inline void SetMotionBoxVelocity(const Vector2_f& velo, MotionBoxState* state) {
 | |
|   state->set_dx(velo.x());
 | |
|   state->set_dy(velo.y());
 | |
| }
 | |
| 
 | |
| // Derive normalization factor from image aspect ratio so that the scale for the
 | |
| // longer edge is 1. scale will be reversed if `invert` is true.
 | |
| void ScaleFromAspect(float aspect, bool invert, float* scale_x, float* scale_y);
 | |
| 
 | |
| // Returns 4 corners of the MotionBox as top_left, bottom_left, bottom_right
 | |
| // and top_right.  Applies 2D scaling prior to rotation, which is necessary to
 | |
| // preserve orthogonality of the rotation if the scaling is not isotropic.
 | |
| std::array<Vector2_f, 4> MotionBoxCorners(
 | |
|     const MotionBoxState& state,
 | |
|     const Vector2_f& scaling = Vector2_f(1.0f, 1.0f));
 | |
| 
 | |
| // Computes corresponding line equations for MotionBoxCorners.
 | |
| // Output line equations on 4 sides.
 | |
| // Returns true if box is normal, false if we encounter abnormal box which
 | |
| // leads to numerical problems.
 | |
| // Applies 2D scaling prior to rotation, which is necessary to
 | |
| // preserve orthogonality of the rotation if the scaling is not isotropic.
 | |
| bool MotionBoxLines(const MotionBoxState& state, const Vector2_f& scaling,
 | |
|                     std::array<Vector3_f, 4>* box_lines);
 | |
| 
 | |
| // Returns top-left and bottom right corner of the bounding box
 | |
| // of the MotionBoxState.
 | |
| void MotionBoxBoundingBox(const MotionBoxState& state, Vector2_f* top_left,
 | |
|                           Vector2_f* bottom_right);
 | |
| 
 | |
| // Adds all inliers from state to the inlier map (as id, score) tuple.
 | |
| // If id already exist, score is updated to be the maximum of current and
 | |
| // existing score.
 | |
| inline void MotionBoxInliers(const MotionBoxState& state,
 | |
|                              std::unordered_map<int, int>* inliers) {
 | |
|   CHECK(inliers);
 | |
|   const int num_inliers = state.inlier_ids_size();
 | |
|   DCHECK_EQ(num_inliers, state.inlier_length_size());
 | |
| 
 | |
|   for (int k = 0; k < num_inliers; ++k) {
 | |
|     (*inliers)[state.inlier_ids(k)] =
 | |
|         std::max<int>((*inliers)[state.inlier_ids(k)], state.inlier_length(k));
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Adds all outliers from state to the outlier map.
 | |
| inline void MotionBoxOutliers(const MotionBoxState& state,
 | |
|                               std::unordered_set<int>* outliers) {
 | |
|   for (int id : state.outlier_ids()) {
 | |
|     outliers->insert(id);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // Returns inlier locations from state (normalized in [0, 1] domain).
 | |
| void MotionBoxInlierLocations(const MotionBoxState& state,
 | |
|                               std::vector<Vector2_f>* inlier_pos);
 | |
| // Same for outlier positions.
 | |
| void MotionBoxOutlierLocations(const MotionBoxState& state,
 | |
|                                std::vector<Vector2_f>* outlier_pos);
 | |
| 
 | |
| // Get corners of rotated rectangle. Note that the quad component in
 | |
| // MotionBoxState is not used in this function. Only the rotated rectangle
 | |
| // is used.
 | |
| // Inputs:
 | |
| //   -- state: the MotionBoxState where we extract the rotated rectangle
 | |
| //   -- scaling: additional scaling we apply on x and y axis
 | |
| // Output:
 | |
| //   corners in counter-clockwise order
 | |
| std::array<Vector2_f, 4> GetCornersOfRotatedRect(const MotionBoxState& state,
 | |
|                                                  const Vector2_f& scaling);
 | |
| 
 | |
| // Use the position, width, and height in MotionBoxState to initialize
 | |
| // the quad. Only use it when you want to get homography for tracking.
 | |
| void InitializeQuadInMotionBoxState(MotionBoxState* state);
 | |
| 
 | |
| // Initializes inliers and outliers related fields in MotionBoxState from
 | |
| // TrackingData. The box or quad position will be read from `state` so they need
 | |
| // to be set beforehand.
 | |
| void InitializeInliersOutliersInMotionBoxState(const TrackingData& tracking,
 | |
|                                                MotionBoxState* state);
 | |
| 
 | |
| // Initializes pnp_homography field in MotionBoxState using perspective
 | |
| // transform between a physical rectangle with specified aspect ratio and a
 | |
| // screen quad.
 | |
| void InitializePnpHomographyInMotionBoxState(
 | |
|     const TrackingData& tracking, const TrackStepOptions& track_step_options,
 | |
|     MotionBoxState* state);
 | |
| 
 | |
| // Represents the motion of a feature at pos between frames, differentiating
 | |
| // object from background motion (supplied via a MotionVectorFrame).
 | |
| struct MotionVector {
 | |
|   MotionVector() : pos(0, 0), background(0, 0), object(0, 0) {}
 | |
| 
 | |
|   MotionVector(const Vector2_f& pos_, const Vector2_f& background_,
 | |
|                const Vector2_f& object_)
 | |
|       : pos(pos_), background(background_), object(object_) {}
 | |
| 
 | |
|   Vector2_f Location() const { return pos; }
 | |
|   Vector2_f MatchLocation() const { return pos + background + object; }
 | |
|   Vector2_f Motion() const { return background + object; }
 | |
| 
 | |
|   // Position of the feature in normalized domain [0, 1].
 | |
|   Vector2_f pos;
 | |
|   // Motion due to background (i.e. camera motion).
 | |
|   Vector2_f background;
 | |
|   // Motion due to foreground (i.e. object motion in addition to background).
 | |
|   // If feature belong to background, object motion is nearly zero.
 | |
|   Vector2_f object;
 | |
| 
 | |
|   int track_id = -1;
 | |
| 
 | |
|   // Returns the MotionVector stored in the internal state at index.
 | |
|   static MotionVector FromInternalState(const MotionBoxInternalState& internal,
 | |
|                                         int index);
 | |
| };
 | |
| 
 | |
| constexpr float kTrackingDefaultFps = 30.0;
 | |
| 
 | |
| // Holds motion vectors and background model for each frame.
 | |
| // Note: Specified in the aspect preserving domain under uniform scaling,
 | |
| // longest dimension normalized to 1, i.e. if aspect_ratio >= 1, width is
 | |
| // normalized to 1 otherwise height is normalized to 1.
 | |
| struct MotionVectorFrame {
 | |
|   std::vector<MotionVector> motion_vectors;
 | |
| 
 | |
|   Homography background_model;
 | |
|   bool valid_background_model = true;
 | |
|   bool is_duplicated = false;      // Set if frame is duplicated w.r.t.
 | |
|                                    // previous one.
 | |
|   bool is_chunk_boundary = false;  // Set if this is the first frame in a chunk.
 | |
| 
 | |
|   float duration_ms = 1000.0f / kTrackingDefaultFps;
 | |
| 
 | |
|   // Aspect ratio (w/h) of the original frame.
 | |
|   float aspect_ratio = 1.0f;
 | |
| 
 | |
|   // Stores the tracked ids that have been discarded actively. This information
 | |
|   // will be used to avoid misjudgement on tracking continuity.
 | |
|   absl::flat_hash_set<int>* actively_discarded_tracked_ids = nullptr;
 | |
| };
 | |
| 
 | |
| // Transforms TrackingData to MotionVectorFrame, ready to be used by tracking
 | |
| // algorithm (so the MotionVectorFrame data is denormalized).
 | |
| void MotionVectorFrameFromTrackingData(const TrackingData& tracking_data,
 | |
|                                        MotionVectorFrame* motion_vector_frame);
 | |
| 
 | |
| // Transform TrackingData to feature positions and descriptors, ready to be used
 | |
| // by detection (re-acquisition) algorithm (so the "features" is denomalized).
 | |
| // Descriptors with all 0s will be discarded.
 | |
| void FeatureAndDescriptorFromTrackingData(
 | |
|     const TrackingData& tracking_data, std::vector<Vector2_f>* features,
 | |
|     std::vector<std::string>* descriptors);
 | |
| 
 | |
| // Inverts MotionVectorFrame (by default defined as motion from current to
 | |
| // previous frame) to hold motion from previous to current frame.
 | |
| void InvertMotionVectorFrame(const MotionVectorFrame& input,
 | |
|                              MotionVectorFrame* output);
 | |
| 
 | |
| // Returns duration in ms for this chunk item.
 | |
| float TrackingDataDurationMs(const TrackingDataChunk::Item& item);
 | |
| 
 | |
| // Returns feature indices that are within the given box. If the box size isn't
 | |
| // big enough to cover sufficient features (i.e., min_num_features), this will
 | |
| // iteratively enlarge the box size (up to max_enlarge_size) to include more
 | |
| // features. The argument box_scaling is used in MotionBoxLines() to get
 | |
| // properly scaled box corners. Note: box_scaling and max_enlarge_size need to
 | |
| // be in normalized image space.
 | |
| // TODO: Add unit test.
 | |
| void GetFeatureIndicesWithinBox(const std::vector<Vector2_f>& features,
 | |
|                                 const MotionBoxState& box_state,
 | |
|                                 const Vector2_f& box_scaling,
 | |
|                                 float max_enlarge_size, int min_num_features,
 | |
|                                 std::vector<int>* inlier_indices);
 | |
| 
 | |
| // Represents a moving box over time. Initial position is supplied via
 | |
| // ResetAtFrame, and subsequent positions for previous and next frames are
 | |
| // determined via tracking by TrackStep method.
 | |
| // Example usage:
 | |
| // // Assuming metadata is available: vector<MotionVectorFrame> mvf;
 | |
| // MotionBoxState box_state;
 | |
| // // Set to center 20%.
 | |
| // box_state.set_pos_x(0.4);
 | |
| // box_state.set_pos_y(0.4);
 | |
| // box_state.set_width(0.2);
 | |
| // box_state.set_height(0.2);
 | |
| //
 | |
| // // Initialize first position at frame 5.
 | |
| // MotionBox motion_box(TrackStepOptions());
 | |
| // motion_box.ResetAtFrame(4, box_state);
 | |
| // // Track 4 frames backward and forward in time.
 | |
| // for (int i = 0; i < 4; ++i) {
 | |
| //   // Tracking steps need to be called contiguously, as otherwise no
 | |
| //   // prior location for the track is present and TrackStep will fail.
 | |
| //   // Backward.
 | |
| //   motion_box.TrackStep(4 - i, mvf[4 -i], false, nullptr);
 | |
| //   // Get position -> consume for display, etc.
 | |
| //   motion_box.StateAtFrame(4 - i);
 | |
| //
 | |
| //   // Forward.
 | |
| //   motion_box.TrackStep(4 + i, mvf[4 -i], true, nullptr);
 | |
| //   // Get position -> consume.
 | |
| //   motion_box.StateAtFrame(4 + i);
 | |
| // }
 | |
| class MotionBox {
 | |
|  public:
 | |
|   explicit MotionBox(const TrackStepOptions& track_step_options)
 | |
|       : options_(track_step_options) {}
 | |
| 
 | |
|   MotionBox() = default;
 | |
| 
 | |
|   // Sets and overwrites MotionBoxState at specified frame. Use to supply
 | |
|   // initial position.
 | |
|   void ResetAtFrame(int frame, const MotionBoxState& state);
 | |
| 
 | |
|   // Tracks MotionBox from state at from_frame either forward or backward in
 | |
|   // time, based on the passed MotionVectorFrame. (MotionVectorFrame has to
 | |
|   // correspond to requested tracking direction, this is not checked against).
 | |
|   // Returns true if tracking was successful.
 | |
|   // Note: It is assumed that from_frame already has a valid location, either
 | |
|   // via ResetAtFrame or previous successful execution of TrackStep. That is
 | |
|   // TrackStep needs to be called contiguously from a initialized position
 | |
|   // via ResetFrame. Otherwise no prior location for the track is present (at
 | |
|   // from_frame) and TrackStep will fail (return false).
 | |
|   bool TrackStep(int from_frame, const MotionVectorFrame& motion_vectors,
 | |
|                  bool forward);
 | |
| 
 | |
|   MotionBoxState StateAtFrame(int frame) const {
 | |
|     if (frame < queue_start_ ||
 | |
|         frame >= queue_start_ + static_cast<int>(states_.size())) {
 | |
|       LOG(ERROR) << "Requesting state at unknown frame " << frame
 | |
|                  << ". Returning UNTRACKED.";
 | |
|       MotionBoxState invalid;
 | |
|       invalid.set_track_status(MotionBoxState::BOX_UNTRACKED);
 | |
|       return invalid;
 | |
|     } else {
 | |
|       MotionBoxState result = states_[frame - queue_start_];
 | |
|       if (!options_.return_internal_state()) {
 | |
|         result.clear_internal();
 | |
|       }
 | |
|       return result;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   MotionBoxState* MutableStateAtFrame(int frame) {
 | |
|     if (frame < queue_start_ || frame >= queue_start_ + states_.size()) {
 | |
|       return NULL;
 | |
|     } else {
 | |
|       return &states_[frame - queue_start_];
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   bool TrackableFromFrame(int frame) const {
 | |
|     return StateAtFrame(frame).track_status() >= MotionBoxState::BOX_TRACKED;
 | |
|   }
 | |
| 
 | |
|   void set_start_track(int frame) { start_track_ = frame; }
 | |
|   int start_track() const { return start_track_; }
 | |
|   void set_end_track(int frame) { end_track_ = frame; }
 | |
|   int end_track() const { return end_track_; }
 | |
| 
 | |
|   void TrimFront(const int cache_size) {
 | |
|     int trim_count = states_.size() - cache_size;
 | |
|     if (trim_count > 0) {
 | |
|       queue_start_ += trim_count;
 | |
|       while (trim_count-- > 0) {
 | |
|         states_.pop_front();
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   void TrimBack(const int cache_size) {
 | |
|     int trim_count = states_.size() - cache_size;
 | |
|     if (trim_count > 0) {
 | |
|       while (trim_count-- > 0) {
 | |
|         states_.pop_back();
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // If this variable is set to true, then TrackStep would print warning
 | |
|   // messages when tracking is failed.
 | |
|   // Default value is true and is set in tracking.cc.
 | |
|   static bool print_motion_box_warnings_;
 | |
| 
 | |
|  private:
 | |
|   // Determines next position from curr_pos based on tracking data in
 | |
|   // motion_vectors. Also receives history of the last N positions.
 | |
|   void TrackStepImplDeNormalized(
 | |
|       int frome_frame, const MotionBoxState& curr_pos,
 | |
|       const MotionVectorFrame& motion_vectors,
 | |
|       const std::vector<const MotionBoxState*>& history,
 | |
|       MotionBoxState* next_pos) const;
 | |
| 
 | |
|   // Pre-normalization wrapper for above function. De-normalizes domain
 | |
|   // to aspect preserving domain and velocity to current frame period.
 | |
|   void TrackStepImpl(int from_frame, const MotionBoxState& curr_pos,
 | |
|                      const MotionVectorFrame& motion_frame,
 | |
|                      const std::vector<const MotionBoxState*>& history,
 | |
|                      MotionBoxState* next_pos) const;
 | |
| 
 | |
|   // Implementation functions for above TrackStepImpl.
 | |
|   // Returns bounding box for start position and the expansion magnitude
 | |
|   // (normalized) that was applied.
 | |
|   void GetStartPosition(const MotionBoxState& curr_pos, float aspect_ratio,
 | |
|                         float* expand_mag, Vector2_f* top_left,
 | |
|                         Vector2_f* bottom_right) const;
 | |
| 
 | |
|   // Outputs spatial sigma in x and y for spatial weighting.
 | |
|   // Pass current box_state and inverse box domain size.
 | |
|   void GetSpatialGaussWeights(const MotionBoxState& box_state,
 | |
|                               const Vector2_f& inv_box_domain,
 | |
|                               float* spatial_gauss_x,
 | |
|                               float* spatial_gauss_y) const;
 | |
| 
 | |
|   // Outputs subset of motion_vectors that are within the specified domain
 | |
|   // (top_left to bottom_right). Only searches over the range specified via
 | |
|   // start and end idx.
 | |
|   // Each vector is weighted based on gaussian proximity, similar motion,
 | |
|   // track continuity, etc. which forms the prior weight of each feature.
 | |
|   // Features are binned into a grid of fixed dimension for density analysis.
 | |
|   // Also output number of vectors with good prior weights (> 0.1), and number
 | |
|   // of continued inliers.
 | |
|   // Returns true on success, false on failure. When it returns false, the
 | |
|   // output values are not reliable.
 | |
|   bool GetVectorsAndWeights(
 | |
|       const std::vector<MotionVector>& motion_vectors, int start_idx,
 | |
|       int end_idx, const Vector2_f& top_left, const Vector2_f& bottom_right,
 | |
|       const MotionBoxState& box_state, bool valid_background_model,
 | |
|       bool is_chunk_boundary,
 | |
|       float temporal_scale,  // Scale for velocity from standard frame period.
 | |
|       float expand_mag, const std::vector<const MotionBoxState*>& history,
 | |
|       std::vector<const MotionVector*>* vectors,
 | |
|       std::vector<float>* prior_weights, int* number_of_good_prior,
 | |
|       int* number_of_cont_inliers) const;
 | |
| 
 | |
|   // Initializes weights by performing multiple ransac rounds from vectors.
 | |
|   // Error is scaled by irls scale along parallel and orthogonal direction.
 | |
|   void TranslationIrlsInitialization(
 | |
|       const std::vector<const MotionVector*>& vectors,
 | |
|       const Vector2_f& irls_scale, std::vector<float>* weights) const;
 | |
| 
 | |
|   // Wrapper function, estimating object motion w.r.t. various degrees
 | |
|   // of freedom.
 | |
|   void EstimateObjectMotion(
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& prior_weights, int num_continued_inliers,
 | |
|       const Vector2_f& irls_scale, std::vector<float>* weights,
 | |
|       Vector2_f* object_translation, LinearSimilarityModel* object_similarity,
 | |
|       Homography* object_homography) const;
 | |
| 
 | |
|   // Perform IRLS estimation of the passed motion_vector's object motion.
 | |
|   // Each vector is weighted by original_weight / estimation_error, where
 | |
|   // estimation_error is refined in each estimation round.
 | |
|   // Outputs final translation and resulting irls weights (1.0 /
 | |
|   // estimation_error, i.e. with prior bias).
 | |
|   void EstimateTranslation(
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& orig_weights, const Vector2_f& irls_scale,
 | |
|       std::vector<float>* weights, Vector2_f* translation) const;
 | |
| 
 | |
|   // Same as above for similarity. Returns false on failure (numerical
 | |
|   // instability is most common case here).
 | |
|   bool EstimateSimilarity(
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& orig_weights, const Vector2_f& irls_scale,
 | |
|       std::vector<float>* weights, LinearSimilarityModel* lin_sim) const;
 | |
| 
 | |
|   // Same as above for homograph.
 | |
|   bool EstimateHomography(
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& prior_weights, const Vector2_f& irls_scale,
 | |
|       std::vector<float>* weights, Homography* object_homography) const;
 | |
| 
 | |
|   // Perform 6DoF perspective transform based homography estimation using
 | |
|   // motion_vector's object + background motion.
 | |
|   // weights are used to determine whether a vector is inlier or outliers.
 | |
|   // The perspective solver will exclude those vectors with weights smaller than
 | |
|   // kMaxOutlierWeight (0.1).
 | |
|   bool EstimatePnpHomography(
 | |
|       const MotionBoxState& curr_pos,
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& weights, float domain_x, float domain_y,
 | |
|       Homography* pnp_homography) const;
 | |
| 
 | |
|   // Apply pre-computed perspective transform based homography to the next pos.
 | |
|   void ApplyObjectMotionPerspectively(const MotionBoxState& curr_pos,
 | |
|                                       const Homography& pnp_homography,
 | |
|                                       float domain_x, float domain_y,
 | |
|                                       MotionBoxState* next_pos) const;
 | |
| 
 | |
|   // Scores every vector after translation estimation into inliers and outliers
 | |
|   // (based on post_estimation_weights, inlierness is a measure in [0, 1]),
 | |
|   // and records result in next_pos as well as returning inlierness per vector
 | |
|   // in inlier_weights.
 | |
|   // Also computes the following statistics:
 | |
|   // - inlier_density: Local density for each inlier, i.e. measure of how many
 | |
|   //                   other inliers are close to that point. In [0, 1].
 | |
|   // - continued_inliers: Number of inliers that continue to be present already
 | |
|   //                      in curr_pos (same track id)
 | |
|   // - swapped_inliers: Number of inliers that are outliers in curr_pos (same
 | |
|   //                    track id).
 | |
|   // - motion_inliers: Number of inliers of similar motion as previous state.
 | |
|   //                   This measure is complementary to above continued inliers
 | |
|   //                   in case object is moving significantly, in which case
 | |
|   //                   tracks tend to be short lived.
 | |
|   // - kinetic_average: Average object norm of all inliers weighted by
 | |
|   //                    pre_estimation_weights.
 | |
|   void ScoreAndRecordInliers(const MotionBoxState& curr_pos,
 | |
|                              const std::vector<const MotionVector*>& vectors,
 | |
|                              const std::vector<Vector2_f>& grid_positions,
 | |
|                              const std::vector<float>& pre_estimation_weights,
 | |
|                              const std::vector<float>& post_estimation_weights,
 | |
|                              float background_discrimination,
 | |
|                              MotionBoxState* next_pos,
 | |
|                              std::vector<float>* inlier_weights,
 | |
|                              std::vector<float>* inlier_density,
 | |
|                              int* continued_inliers, int* swapped_inliers,
 | |
|                              float* motion_inliers,
 | |
|                              float* kinetic_average) const;
 | |
| 
 | |
|   // Computes motion disparity (in [0, 1]), that is how well does the current
 | |
|   // object motion agree with the previous object motion.
 | |
|   // 0 indicates perfect match, 1 indicates signicant difference.
 | |
|   float ComputeMotionDisparity(const MotionBoxState& curr_pos,
 | |
|                                const Vector2_f& irls_scale,
 | |
|                                float continued_inliers, int num_inliers,
 | |
|                                const Vector2_f& object_motion) const;
 | |
| 
 | |
|   // Computes inlier center and extent (vector positions weighted by
 | |
|   // weights and density).
 | |
|   // Sets center inlier center, if inlier_weight is above min_inlier_sum,
 | |
|   // else to the Motion box center.
 | |
|   void ComputeInlierCenterAndExtent(
 | |
|       const std::vector<const MotionVector*>& motion_vectors,
 | |
|       const std::vector<float>& weights, const std::vector<float>& density,
 | |
|       const MotionBoxState& state, float* min_inlier_sum, Vector2_f* center,
 | |
|       Vector2_f* extent) const;
 | |
| 
 | |
|   float ScaleEstimate(const std::vector<const MotionVector*>& motion_vectors,
 | |
|                       const std::vector<float>& weights, float min_sum) const;
 | |
| 
 | |
|   // Applies spring force from box_state's position to center_of_interest, if
 | |
|   // difference is above rel_threshold. Correcting force equals difference
 | |
|   // above threshold times the spring_force coefficient.
 | |
|   void ApplySpringForce(const Vector2_f& center_of_interest,
 | |
|                         const float rel_threshold, const float spring_force,
 | |
|                         MotionBoxState* box_state) const;
 | |
| 
 | |
|   // Compute the tracking confidence and return the value.
 | |
|   // The confidence is a float value in [0, 1], with 0 being least confident,
 | |
|   // and 1 being most confident.
 | |
|   float ComputeTrackingConfidence(const MotionBoxState& motion_box_state) const;
 | |
| 
 | |
|  private:
 | |
|   class ObjectMotionValidator {
 | |
|    public:
 | |
|     static bool IsValidSimilarity(
 | |
|         const LinearSimilarityModel& linear_similarity_model, float max_scale,
 | |
|         float max_rotation) {
 | |
|       SimilarityModel similarity_model =
 | |
|           LinearSimilarityAdapter::ToSimilarity(linear_similarity_model);
 | |
| 
 | |
|       if (similarity_model.scale() < 1.0f / max_scale ||
 | |
|           similarity_model.scale() > max_scale ||
 | |
|           std::abs(similarity_model.rotation()) > max_rotation) {
 | |
|         return false;
 | |
|       }
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     static bool IsValidHomography(const Homography& homography, float max_scale,
 | |
|                                   float max_rotation) {
 | |
|       // Filter out abnormal homography. Otherwise the determinant of
 | |
|       // projected affine matrix will be negative.
 | |
|       if (!IsInverseStable(homography)) {
 | |
|         LOG(WARNING) << "Homography matrix is not stable.";
 | |
|         return false;
 | |
|       }
 | |
| 
 | |
|       LinearSimilarityModel similarity_model =
 | |
|           LinearSimilarityAdapter::ProjectFrom(homography, 1.0f, 1.0f);
 | |
|       return IsValidSimilarity(similarity_model, max_scale, max_rotation);
 | |
|     }
 | |
| 
 | |
|     // Check if it is a convex quad.
 | |
|     static bool IsValidQuad(const MotionBoxState::Quad& quad) {
 | |
|       const int kQuadVerticesSize = 8;
 | |
|       CHECK_EQ(quad.vertices_size(), kQuadVerticesSize);
 | |
|       for (int a = 0; a < kQuadVerticesSize; a += 2) {
 | |
|         int b = (a + 2) % kQuadVerticesSize;
 | |
|         int c = (a - 2 + kQuadVerticesSize) % kQuadVerticesSize;
 | |
|         Vector2_f ab(quad.vertices(b) - quad.vertices(a),
 | |
|                      quad.vertices(b + 1) - quad.vertices(a + 1));
 | |
|         Vector2_f ac(quad.vertices(c) - quad.vertices(a),
 | |
|                      quad.vertices(c + 1) - quad.vertices(a + 1));
 | |
| 
 | |
|         // Since quad's vertices is defined in counter-clockwise manner, we only
 | |
|         // accept negative cross product.
 | |
|         if (ab.CrossProd(ac) >= 0) {
 | |
|           return false;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     // Check if all the 4 corners of the quad are out of FOV.
 | |
|     static bool IsQuadOutOfFov(const MotionBoxState::Quad& quad,
 | |
|                                const Vector2_f& fov) {
 | |
|       const int kQuadVerticesSize = 8;
 | |
|       CHECK_EQ(quad.vertices_size(), kQuadVerticesSize);
 | |
|       bool too_far = true;
 | |
|       for (int j = 0; j < kQuadVerticesSize; j += 2) {
 | |
|         if (quad.vertices(j) < fov.x() && quad.vertices(j) > 0.0f &&
 | |
|             quad.vertices(j + 1) < fov.y() && quad.vertices(j + 1) > 0.0f) {
 | |
|           too_far = false;
 | |
|           break;
 | |
|         }
 | |
|       }
 | |
|       return too_far;
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   class DistanceWeightsComputer {
 | |
|    public:
 | |
|     DistanceWeightsComputer(const MotionBoxState& initial_state,
 | |
|                             const MotionBoxState& current_state,
 | |
|                             const TrackStepOptions& options);
 | |
| 
 | |
|     // Compute distance weight based on input motion vector position.
 | |
|     float ComputeDistanceWeight(const MotionVector& test_vector);
 | |
| 
 | |
|    private:
 | |
|     Homography ComputeHomographyFromQuad(const MotionBoxState::Quad& src_quad,
 | |
|                                          const MotionBoxState::Quad& dst_quad);
 | |
| 
 | |
|     float cos_neg_a_;
 | |
|     float sin_neg_a_;
 | |
|     float spatial_gauss_x_;
 | |
|     float spatial_gauss_y_;
 | |
|     Vector2_f inv_box_domain_;
 | |
|     Vector2_f box_center_;
 | |
|     Vector2_f box_center_transformed_;
 | |
|     bool is_large_rotation_ = false;
 | |
|     Homography homography_;  // homography from current box to initial box
 | |
|     TrackStepOptions::TrackingDegrees tracking_degrees_;
 | |
|   };
 | |
| 
 | |
|   TrackStepOptions options_;
 | |
|   std::deque<MotionBoxState> states_;
 | |
|   int queue_start_;
 | |
| 
 | |
|   int start_track_;
 | |
|   int end_track_;
 | |
| 
 | |
|   MotionBoxState initial_state_;
 | |
| };
 | |
| 
 | |
| }  // namespace mediapipe.
 | |
| 
 | |
| #endif  // MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
 |