// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Describes camera motion between two frames with various degree of freedom // parametric motion models. // In addition, stores features describing how reliable the estimated motion // model is. // Flags indicate several properties derived from the camera motion, e.g. if a // frame is sharp, blurry or contains overlays. syntax = "proto2"; package mediapipe; import "mediapipe/util/tracking/motion_models.proto"; // Next tag: 33 message CameraMotion { // Background motion expressed in various models. // These are per-frame pair motions (from current to previous frame). // Models are expressed in the un-normalized domain frame_width x frame_height // that is passed to MotionEstimation (storred below). optional TranslationModel translation = 1; optional SimilarityModel similarity = 2; optional LinearSimilarityModel linear_similarity = 3; optional AffineModel affine = 4; optional Homography homography = 5; optional MixtureHomography mixture_homography = 8; // Frame dimensions camera motion was computed over. optional float frame_width = 31; optional float frame_height = 32; // Mixture homographies computed w.r.t. exponentially increasing // regularizers. Above mixture_homography member is selected from spectrum // based on amount of rolling shutter present in the video. repeated MixtureHomography mixture_homography_spectrum = 23; // Relative row sigma w.r.t. frame_height for mixture models. optional float mixture_row_sigma = 10; // Average of all motion vector magnitudes (without accounting for any motion // model), within 10th to 90th percentile (to remove outliers). optional float average_magnitude = 24 [default = 0.0]; // Inlier-weighted variance of the translation model. // Specified, w.r.t. unnormalized video domain that motion models // are computed for. optional float translation_variance = 25 [default = 0.0]; // Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1]. optional float similarity_inlier_ratio = 29 [default = 0]; optional float similarity_strict_inlier_ratio = 30 [default = 0]; // Average registration error of homography in pixels. // Note: These two parameters default to zero in-case homographies have not // been estimated. optional float average_homography_error = 11; // Fraction, in [0,1], of homography inliers. optional float homography_inlier_coverage = 12; // Same as above but with stricter threshold. // (For details, see: MotionEstimationOptions::strict_coverage_scale). // Coverage is designed to measure the amount of significant outliers, // which can affect the validity of the estimated homography. // However, it does not discount small outliers, which occur in case // of small rolling shutter wobbles. For this a stricter version of coverage // is needed, which is essential for computing the rolling_shutter_guess, // i.e. the increase in coverage by using mixtures vs. homographies. optional float homography_strict_inlier_coverage = 22; // Per-block inlier fraction for mixtures. repeated float mixture_inlier_coverage = 13; // Set based on stability analysis indicating if frame is likely to originate // from a rolling shutter camera. (-1 is used to indicate frame was not // tested, e.g. due to mixture deemed unstable for analysis). // Guess is a scaler indicating by how much the mixture models (suitable for // rolling shutter distortions) increased inlier coverage compared to a single // homography. For example a value, of 1.3 indicates, that the mixture models // increased inlier coverage by 30%. // If not -1, range is in [0, inf] (values slightly smaller than 1 are // possible due to suppression of noisy feature tracks during estimation). optional float rolling_shutter_guess = 14; // Indicating if CameraMotion is deemed to originate from rolling // shutter camera (index >= 0), and if so, denotes the index in the // mixture_homography_spectrum, where higher indices correspond to heavier // regularized motions. If motion is not deemed to originate from a rolling // shutter camera, index is set to -1. optional int32 rolling_shutter_motion_index = 16 [default = -1]; // List of overlay indices (cell locations in column major format) over domain // of size overlay_domain x overlay_domain, where // overlay_domain is set by MotionEstimation to // MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size. // Overlay analysis is performed over chunk of frames, as specified by // MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting // overlay indices being assigned to each frame of the chunk. // Consequently it suffices to store the result only for the first frame // of every chunk. Subsequent frames store a single negative index relative // to the first chunk frame indicating where to locate the overlay indicies. // Specifically if for frame f, overlay_indices(0) == -2, overlay indices for // corresponding chunk can be found at frame f - 2. // For details about how overlay indices are used to flag a frame to contain // an overlay, see MotionFilterOptions::OverlayOptions. repeated int32 overlay_indices = 17; optional int32 overlay_domain = 18 [default = 10]; // CameraMotion type indicates whether highest degree of freedom (DOF) // model estimation was deemed stable, in which case CameraMotion::Type is set // to VALID. // If a model was deemed not stable (according to *StabilityBounds in // MotionEstimationOptions), it is set to the lower dof type which was deemed // stable. enum Type { VALID = 0; // All requested motion models estimated reliably. UNSTABLE_HOMOG = 1; // Fallback to homographies, mixture unreliable. UNSTABLE_SIM = 2; // Fallback to similarity model, homography // unreliable. UNSTABLE = 3; // Fallback to translation model, similarity // unreliable, legacy naming. INVALID = 4; // Identity model, translation unreliable. } optional Type type = 6 [default = VALID]; // If set, stores original type in case it was overriden (by filtering // functions, etc.). optional Type overridden_type = 15 [default = VALID]; // Set of optional *bit* flags set for various purposes. enum Flags { FLAG_SHOT_BOUNDARY = 1; // Set to indicate presence of a // shot boundary. FLAG_BLURRY_FRAME = 2; FLAG_MAJOR_OVERLAY = 4; FLAG_SHARP_FRAME = 8; // Set if frame is considered sharp // in a neighborhood of frames. FLAG_SINGULAR_ESTIMATION = 16; // Indicates that estimation resulted // in singular optimization problem. // Used internally by MotionEstimation. // Indicates if shot boundary is part of a fade. If so, all frames of the // fade will be labeled with the FLAG but only the begin and end of the fade // will have the FLAG_SHOT_BOUNDARY set. FLAG_SHOT_FADE = 32; FLAG_DUPLICATED = 64; // Set if frame is exact duplicate of // previous frame. FLAG_CENTER_FRAME = 128; // Indicates this frame is at the // center of the sequence. Currently // used to constrain stabilizing crop // transform. } optional int32 flags = 19 [default = 0]; // Same as in RegionFlowFeatureList (from region_flow.proto), measures blur // as average cornerness over textured areas. As it depends on the image // content, should only be used relative. optional float blur_score = 20; // Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching // frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest // frame. optional float bluriness = 21 [default = 0.0]; // Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction // of long feature tracks that got rejected for this frame. optional float frac_long_features_rejected = 26; // Same as in RegionFlowFeatureList (from region_flow.proto). // Timestamp in micro seconds of the underlying frame. optional int64 timestamp_usec = 27 [default = 0]; // Same as in RegionFlowFeatureList (from region_flow.proto). // Denotes frame that motion was computed w.r.t. to, locally to the current // frame. Values < 0 indicate backward tracking, while values > 0 indicate // forward tracking. For example, match_frame = -1, indicates tracking is // from current to previous frame. optional int32 match_frame = 28 [default = 0]; // Deprecated fields. extensions 9; }