// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mediapipe; option java_package = "com.google.mediapipe.tracking"; option java_multiple_files = true; // Captures additional information about a RegionFlowFeature's // surrounding patch. // Using MotionEstimation::RetrieveRegionFlowFeatureList or // ComputeRegionFlowFeatureDescriptors the patch descriptor has the folling // layout: // (9 dimensional: 3 mean intensities, 3x3 covariance matrix, (only store upper // half (6 elems) in column major order, i.e. indices for data in patch // descriptor refer to: // mean: 0 1 2, covariance: 3 4 5 // 6 7 // 8 message PatchDescriptor { repeated float data = 1; // The actual feature descriptor. } // Binary feature descriptor for a particular feature. // For example: orb // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf message BinaryFeatureDescriptor { optional bytes data = 1; } // Internal datastructure used during temporal IRLS smoothing. message TemporalIRLSSmoothing { optional float weight_sum = 1 [default = 0]; optional float value_sum = 2 [default = 0]; } // Tracked feature at location (x,y) with flow (dx, dy) and patch based // error (sum of absolute value of intensity difference). // Next tag: 19 message RegionFlowFeature { optional float x = 1 [default = 0]; optional float y = 2 [default = 0]; optional float dx = 3 [default = 0]; optional float dy = 4 [default = 0]; // Features that belong to the same feature track are assigned a unique id // and are identified via it. // Note, this id is only unique within the lifetime of a RegionFlowComputation // object. That is, if distribution or parallelization using multiple // instances was used, the ids are only unique within that instance context. optional int32 track_id = 13 [default = -1]; // no id. // Tracking error as patch intensity residual (SSD). optional float tracking_error = 5 [default = 0]; // Inverse of registration error (in pixels), after parametric motion model // fitting. Values are in [0, 1e6]. // Low values correspond to outliers, high values to inliers. // Set by MotionEstimation::EstimateMotions* optional float irls_weight = 6 [default = 1.0]; // Corner response (computed as minimum eigenvalue of // block filtered 2nd moment matrix). optional float corner_response = 11 [default = 0.0]; // Patch feature descriptors. *For internal use only*. External clients should // not rely on their contents. optional PatchDescriptor feature_descriptor = 7; optional PatchDescriptor feature_match_descriptor = 8; // Internal datastructure used temporally during temporal IRLS smoothing. optional TemporalIRLSSmoothing internal_irls = 10; // Optional label for debugging purposes. optional string label = 14; // Flags indicating specific statuses. enum Flags { FLAG_BROKEN_TRACK = 1; // Used for long feature tracks if track id // was reset. } optional int32 flags = 15; // Unique feature id per RegionFlowComputation object. optional int32 feature_id = 16; // octave (pyramid layer) from which the keypoint has been extracted optional int32 octave = 17 [default = 0]; // Feature descriptor for the current feature. optional BinaryFeatureDescriptor binary_feature_descriptor = 18; // Deprecated fields. extensions 9, 12; } // RegionFlowFrame is a optical flow representation where each region has a // consistent optical flow (adheres to local translational model). // Regions are arranged in a regular grid according to BlockDescriptor. // Next tag: 11. message RegionFlowFrame { // Next tag: 8 message RegionFlow { required int32 region_id = 1; // Mean anchor point (centroid) of flow vector and mean flow. optional float centroid_x = 2 [default = 0]; optional float centroid_y = 3 [default = 0]; optional float flow_x = 4 [default = 0]; optional float flow_y = 5 [default = 0]; repeated RegionFlowFeature feature = 7; // Deprecated fields. extensions 6; } // Sorted by id for quick lookup. repeated RegionFlow region_flow = 1; // Total number of features in all RegionFlow's. optional int32 num_total_features = 2 [default = 0]; // If set, indicates that the frame's region flow is unstable. // (not enough features or coverage too low). optional bool unstable_frame = 4 [default = false]; // Blur score of the current frame is defined as the n-th percentile // of the corneress of the input frame evaluated over regions of high // corneress. For details see BlurScoreOptions in // region_flow_computation.proto. // The actual value is pretty meaningless, but relative to the blur score // of other frames one can detect blurry frames, e.g. by a 'significant' // local maxima in a sequence of blur_scores. optional float blur_score = 7; optional int32 frame_width = 8; optional int32 frame_height = 9; // Region flow is estimated using a grid of equal sized bins as regions. // BlockDescriptor specifies size of bins/blocks. message BlockDescriptor { optional int32 block_width = 1; optional int32 block_height = 2; optional int32 num_blocks_x = 3 [default = 0]; optional int32 num_blocks_y = 4 [default = 0]; } optional BlockDescriptor block_descriptor = 10; // Deprecated fields. extensions 3, 5, 6; } // Encapsulates a list of features with associated flow. // Can be extracted from RegionFlow via GetRegionFlowFeatureList // declared in region_flow.h. This is the essential (additional) information // required by Cropper using wobble_suppression with displacements. // Next tag: 14 message RegionFlowFeatureList { repeated RegionFlowFeature feature = 1; optional int32 frame_width = 2; optional int32 frame_height = 3; // Set from corresponding RegionFlowFrame field. optional bool unstable = 4 [default = false]; // Records the minimum distance from the image border for each feature and // matching feature (if enforced > 0). optional int32 distance_from_border = 5 [default = 0]; // Set from corresponding RegionFlowFrame field. optional float blur_score = 6; // If set, indicates, that features represent long tracks, i.e. each feature // has a valid track_id() >= 0. optional bool long_tracks = 7 [default = false]; // If long_tracks, stores number of long feature tracks that got rejected in // this frame, as their patches were deemed inconsistent with the track's very // first extracted patch. optional float frac_long_features_rejected = 8 [default = 0]; // Measures visual consistency between adjacent frames. In particular, stores // the absolute *change* in visual difference between two adjancent frame // pairs, i.e. the modulus of the 2nd derivative of the frame appearance. // Normalized w.r.t. number of channels and total pixels of the underlying // frame. // In particular for sudden changes (e.g. shot boundaries) this value will // be significantly non-zero (> 0.05). // Negative value per default indicates no consistency has been computed. optional float visual_consistency = 9 [default = -1]; // Timestamp in micro seconds of the underlying frame, that is the frame // for which the source features (not matching features) were computed. optional int64 timestamp_usec = 10 [default = 0]; // Denotes the frame that flow was computed w.r.t. to, locally to the current // frame. For example, if current frame is N, N + match_frame is the matching // frame that flow was computed to. // Values < 0 indicate backward tracking, while values > 0 indicate forward // tracking. By default, for empty feature lists, matching frame is the // same as current frame, i.e. match_frame = 0. optional int32 match_frame = 11 [default = 0]; // Set, if frame is estimated to be an exact duplicate of the previous frame. optional bool is_duplicated = 12 [default = false]; // Stores all the tracked ids that have been discarded actively in this frame. // This information will be popluated via RegionFlowFeatureList, so that the // downstreaming modules can receive it and use it to avoid misjudgement on // tracking continuity. // Discard reason: // (1) A tracked feature has too long track, which might create drift. // (2) A tracked feature in a highly densed area, which provides little value. repeated int32 actively_discarded_tracked_ids = 13; } // Salient point location (normalized w.r.t. frame_width and frame_height, i.e. // specified in the domain [0, 1] x [0, 1]). // For TYPE_INCLUDE: // During retargeting and stabilization salient points introduce constraints // that will try to keep the normalized location in the rectangle // frame_size - normalized bounds. // For this soft constraints are used, therefore the weight specifies // how "important" the salient point is (higher is better). // In particular for each point p the retargeter introduces two pairs of // constraints of the form: // x - slack < width - right // and x + slack > 0 + left, with slack > 0 // where the weight specifies the importance of the slack. // // For TYPE_EXCLUDE_*: // Similar to above, but constraints are introduced to keep // the point to the left of the left bound OR the right of the right bound. // In particular: // x - slack < left OR // x + slack >= right // Similar to above, the weight specifies the importance of the slack. // // Note: Choosing a too high weight can lead to // jerkiness as the stabilization essentially starts tracking the salient point. message SalientPoint { // Normalized location of the point (within domain [0, 1] x [0, 1]. optional float norm_point_x = 1 [default = 0.0]; optional float norm_point_y = 2 [default = 0.0]; enum SalientPointType { TYPE_INCLUDE = 1; TYPE_EXCLUDE_LEFT = 2; TYPE_EXCLUDE_RIGHT = 3; } // Salient point type. By default we try to frame the salient point within // the bounding box specified by left, bottom, right, top. Alternatively, one // can choose to exclude the point. For details, see discussion above. optional SalientPointType type = 11 [default = TYPE_INCLUDE]; // Bounds are specified in normalized coordinates [0, 1], FROM the specified // border. Opposing bounds (e.g. left and right) may not add to values // larger than 1. // Default bounds center salient point within centering third of the frame. optional float left = 3 [default = 0.3]; optional float bottom = 4 [default = 0.3]; optional float right = 9 [default = 0.3]; optional float top = 10 [default = 0.3]; optional float weight = 5 [default = 15]; // In addition salient point can represent a region of interest (defined as // ellipse of size norm_major x norm_minor (normalized to [0, 1] domain) // which orientation is given by angle (in radians in [0, pi]). // Due to aspect ratio change of the normalized domain, it is recommended that // transformations to other domains are done via the ScaleSalientPoint // function. optional float norm_major = 6; optional float norm_minor = 7; // Angle of major axis with x-axis (counter-clock wise, in radians). optional float angle = 8; extensions 20000 to max; } // Aggregates SalientPoint's for a frame. message SalientPointFrame { repeated SalientPoint point = 1; extensions 20000 to max; }