199 lines
9.2 KiB
Protocol Buffer
199 lines
9.2 KiB
Protocol Buffer
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// Describes camera motion between two frames with various degree of freedom
|
|
// parametric motion models.
|
|
// In addition, stores features describing how reliable the estimated motion
|
|
// model is.
|
|
// Flags indicate several properties derived from the camera motion, e.g. if a
|
|
// frame is sharp, blurry or contains overlays.
|
|
|
|
syntax = "proto2";
|
|
|
|
package mediapipe;
|
|
|
|
import "mediapipe/util/tracking/motion_models.proto";
|
|
|
|
// Next tag: 33
|
|
message CameraMotion {
|
|
// Background motion expressed in various models.
|
|
// These are per-frame pair motions (from current to previous frame).
|
|
// Models are expressed in the un-normalized domain frame_width x frame_height
|
|
// that is passed to MotionEstimation (storred below).
|
|
optional TranslationModel translation = 1;
|
|
optional SimilarityModel similarity = 2;
|
|
optional LinearSimilarityModel linear_similarity = 3;
|
|
optional AffineModel affine = 4;
|
|
optional Homography homography = 5;
|
|
|
|
optional MixtureHomography mixture_homography = 8;
|
|
|
|
// Frame dimensions camera motion was computed over.
|
|
optional float frame_width = 31;
|
|
optional float frame_height = 32;
|
|
|
|
// Mixture homographies computed w.r.t. exponentially increasing
|
|
// regularizers. Above mixture_homography member is selected from spectrum
|
|
// based on amount of rolling shutter present in the video.
|
|
repeated MixtureHomography mixture_homography_spectrum = 23;
|
|
|
|
// Relative row sigma w.r.t. frame_height for mixture models.
|
|
optional float mixture_row_sigma = 10;
|
|
|
|
// Average of all motion vector magnitudes (without accounting for any motion
|
|
// model), within 10th to 90th percentile (to remove outliers).
|
|
optional float average_magnitude = 24 [default = 0.0];
|
|
|
|
// Inlier-weighted variance of the translation model.
|
|
// Specified, w.r.t. unnormalized video domain that motion models
|
|
// are computed for.
|
|
optional float translation_variance = 25 [default = 0.0];
|
|
|
|
// Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
|
|
optional float similarity_inlier_ratio = 29 [default = 0];
|
|
optional float similarity_strict_inlier_ratio = 30 [default = 0];
|
|
|
|
// Average registration error of homography in pixels.
|
|
// Note: These two parameters default to zero in-case homographies have not
|
|
// been estimated.
|
|
optional float average_homography_error = 11;
|
|
|
|
// Fraction, in [0,1], of homography inliers.
|
|
optional float homography_inlier_coverage = 12;
|
|
|
|
// Same as above but with stricter threshold.
|
|
// (For details, see: MotionEstimationOptions::strict_coverage_scale).
|
|
// Coverage is designed to measure the amount of significant outliers,
|
|
// which can affect the validity of the estimated homography.
|
|
// However, it does not discount small outliers, which occur in case
|
|
// of small rolling shutter wobbles. For this a stricter version of coverage
|
|
// is needed, which is essential for computing the rolling_shutter_guess,
|
|
// i.e. the increase in coverage by using mixtures vs. homographies.
|
|
optional float homography_strict_inlier_coverage = 22;
|
|
|
|
// Per-block inlier fraction for mixtures.
|
|
repeated float mixture_inlier_coverage = 13;
|
|
|
|
// Set based on stability analysis indicating if frame is likely to originate
|
|
// from a rolling shutter camera. (-1 is used to indicate frame was not
|
|
// tested, e.g. due to mixture deemed unstable for analysis).
|
|
// Guess is a scaler indicating by how much the mixture models (suitable for
|
|
// rolling shutter distortions) increased inlier coverage compared to a single
|
|
// homography. For example a value, of 1.3 indicates, that the mixture models
|
|
// increased inlier coverage by 30%.
|
|
// If not -1, range is in [0, inf] (values slightly smaller than 1 are
|
|
// possible due to suppression of noisy feature tracks during estimation).
|
|
optional float rolling_shutter_guess = 14;
|
|
|
|
// Indicating if CameraMotion is deemed to originate from rolling
|
|
// shutter camera (index >= 0), and if so, denotes the index in the
|
|
// mixture_homography_spectrum, where higher indices correspond to heavier
|
|
// regularized motions. If motion is not deemed to originate from a rolling
|
|
// shutter camera, index is set to -1.
|
|
optional int32 rolling_shutter_motion_index = 16 [default = -1];
|
|
|
|
// List of overlay indices (cell locations in column major format) over domain
|
|
// of size overlay_domain x overlay_domain, where
|
|
// overlay_domain is set by MotionEstimation to
|
|
// MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size.
|
|
// Overlay analysis is performed over chunk of frames, as specified by
|
|
// MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting
|
|
// overlay indices being assigned to each frame of the chunk.
|
|
// Consequently it suffices to store the result only for the first frame
|
|
// of every chunk. Subsequent frames store a single negative index relative
|
|
// to the first chunk frame indicating where to locate the overlay indicies.
|
|
// Specifically if for frame f, overlay_indices(0) == -2, overlay indices for
|
|
// corresponding chunk can be found at frame f - 2.
|
|
// For details about how overlay indices are used to flag a frame to contain
|
|
// an overlay, see MotionFilterOptions::OverlayOptions.
|
|
repeated int32 overlay_indices = 17;
|
|
optional int32 overlay_domain = 18 [default = 10];
|
|
|
|
// CameraMotion type indicates whether highest degree of freedom (DOF)
|
|
// model estimation was deemed stable, in which case CameraMotion::Type is set
|
|
// to VALID.
|
|
// If a model was deemed not stable (according to *StabilityBounds in
|
|
// MotionEstimationOptions), it is set to the lower dof type which was deemed
|
|
// stable.
|
|
enum Type {
|
|
VALID = 0; // All requested motion models estimated reliably.
|
|
UNSTABLE_HOMOG = 1; // Fallback to homographies, mixture unreliable.
|
|
UNSTABLE_SIM = 2; // Fallback to similarity model, homography
|
|
// unreliable.
|
|
UNSTABLE = 3; // Fallback to translation model, similarity
|
|
// unreliable, legacy naming.
|
|
INVALID = 4; // Identity model, translation unreliable.
|
|
}
|
|
|
|
optional Type type = 6 [default = VALID];
|
|
|
|
// If set, stores original type in case it was overriden (by filtering
|
|
// functions, etc.).
|
|
optional Type overridden_type = 15 [default = VALID];
|
|
|
|
// Set of optional *bit* flags set for various purposes.
|
|
enum Flags {
|
|
FLAG_SHOT_BOUNDARY = 1; // Set to indicate presence of a
|
|
// shot boundary.
|
|
FLAG_BLURRY_FRAME = 2;
|
|
FLAG_MAJOR_OVERLAY = 4;
|
|
FLAG_SHARP_FRAME = 8; // Set if frame is considered sharp
|
|
// in a neighborhood of frames.
|
|
FLAG_SINGULAR_ESTIMATION = 16; // Indicates that estimation resulted
|
|
// in singular optimization problem.
|
|
// Used internally by MotionEstimation.
|
|
// Indicates if shot boundary is part of a fade. If so, all frames of the
|
|
// fade will be labeled with the FLAG but only the begin and end of the fade
|
|
// will have the FLAG_SHOT_BOUNDARY set.
|
|
FLAG_SHOT_FADE = 32;
|
|
|
|
FLAG_DUPLICATED = 64; // Set if frame is exact duplicate of
|
|
// previous frame.
|
|
FLAG_CENTER_FRAME = 128; // Indicates this frame is at the
|
|
// center of the sequence. Currently
|
|
// used to constrain stabilizing crop
|
|
// transform.
|
|
}
|
|
|
|
optional int32 flags = 19 [default = 0];
|
|
|
|
// Same as in RegionFlowFeatureList (from region_flow.proto), measures blur
|
|
// as average cornerness over textured areas. As it depends on the image
|
|
// content, should only be used relative.
|
|
optional float blur_score = 20;
|
|
|
|
// Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching
|
|
// frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest
|
|
// frame.
|
|
optional float bluriness = 21 [default = 0.0];
|
|
|
|
// Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction
|
|
// of long feature tracks that got rejected for this frame.
|
|
optional float frac_long_features_rejected = 26;
|
|
|
|
// Same as in RegionFlowFeatureList (from region_flow.proto).
|
|
// Timestamp in micro seconds of the underlying frame.
|
|
optional int64 timestamp_usec = 27 [default = 0];
|
|
|
|
// Same as in RegionFlowFeatureList (from region_flow.proto).
|
|
// Denotes frame that motion was computed w.r.t. to, locally to the current
|
|
// frame. Values < 0 indicate backward tracking, while values > 0 indicate
|
|
// forward tracking. For example, match_frame = -1, indicates tracking is
|
|
// from current to previous frame.
|
|
optional int32 match_frame = 28 [default = 0];
|
|
|
|
// Deprecated fields.
|
|
extensions 9;
|
|
}
|