2f86a459b6
GitOrigin-RevId: 5b23708185311ae39a8605b0c2eff721e7b4939f
305 lines
12 KiB
Protocol Buffer
305 lines
12 KiB
Protocol Buffer
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto2";
|
|
|
|
package mediapipe;
|
|
|
|
option java_package = "com.google.mediapipe.tracking";
|
|
option java_multiple_files = true;
|
|
|
|
// Captures additional information about a RegionFlowFeature's
|
|
// surrounding patch.
|
|
// Using MotionEstimation::RetrieveRegionFlowFeatureList or
|
|
// ComputeRegionFlowFeatureDescriptors the patch descriptor has the folling
|
|
// layout:
|
|
// (9 dimensional: 3 mean intensities, 3x3 covariance matrix, (only store upper
|
|
// half (6 elems) in column major order, i.e. indices for data in patch
|
|
// descriptor refer to:
|
|
// mean: 0 1 2, covariance: 3 4 5
|
|
// 6 7
|
|
// 8
|
|
message PatchDescriptor {
|
|
repeated float data = 1; // The actual feature descriptor.
|
|
}
|
|
|
|
// Binary feature descriptor for a particular feature.
|
|
// For example: orb
|
|
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf
|
|
message BinaryFeatureDescriptor {
|
|
optional bytes data = 1;
|
|
}
|
|
|
|
// Internal datastructure used during temporal IRLS smoothing.
|
|
message TemporalIRLSSmoothing {
|
|
optional float weight_sum = 1 [default = 0];
|
|
optional float value_sum = 2 [default = 0];
|
|
}
|
|
|
|
// Tracked feature at location (x,y) with flow (dx, dy) and patch based
|
|
// error (sum of absolute value of intensity difference).
|
|
// Next tag: 19
|
|
message RegionFlowFeature {
|
|
optional float x = 1 [default = 0];
|
|
optional float y = 2 [default = 0];
|
|
optional float dx = 3 [default = 0];
|
|
optional float dy = 4 [default = 0];
|
|
|
|
// Features that belong to the same feature track are assigned a unique id
|
|
// and are identified via it.
|
|
// Note, this id is only unique within the lifetime of a RegionFlowComputation
|
|
// object. That is, if distribution or parallelization using multiple
|
|
// instances was used, the ids are only unique within that instance context.
|
|
optional int32 track_id = 13 [default = -1]; // no id.
|
|
|
|
// Tracking error as patch intensity residual (SSD).
|
|
optional float tracking_error = 5 [default = 0];
|
|
|
|
// Inverse of registration error (in pixels), after parametric motion model
|
|
// fitting. Values are in [0, 1e6].
|
|
// Low values correspond to outliers, high values to inliers.
|
|
// Set by MotionEstimation::EstimateMotions*
|
|
optional float irls_weight = 6 [default = 1.0];
|
|
|
|
// Corner response (computed as minimum eigenvalue of
|
|
// block filtered 2nd moment matrix).
|
|
optional float corner_response = 11 [default = 0.0];
|
|
|
|
// Patch feature descriptors. *For internal use only*. External clients should
|
|
// not rely on their contents.
|
|
optional PatchDescriptor feature_descriptor = 7;
|
|
optional PatchDescriptor feature_match_descriptor = 8;
|
|
|
|
// Internal datastructure used temporally during temporal IRLS smoothing.
|
|
optional TemporalIRLSSmoothing internal_irls = 10;
|
|
|
|
// Optional label for debugging purposes.
|
|
optional string label = 14;
|
|
|
|
// Flags indicating specific statuses.
|
|
enum Flags {
|
|
FLAG_BROKEN_TRACK = 1; // Used for long feature tracks if track id
|
|
// was reset.
|
|
}
|
|
|
|
optional int32 flags = 15;
|
|
|
|
// Unique feature id per RegionFlowComputation object.
|
|
optional int32 feature_id = 16;
|
|
|
|
// octave (pyramid layer) from which the keypoint has been extracted
|
|
optional int32 octave = 17 [default = 0];
|
|
|
|
// Feature descriptor for the current feature.
|
|
optional BinaryFeatureDescriptor binary_feature_descriptor = 18;
|
|
|
|
// Deprecated fields.
|
|
extensions 9, 12;
|
|
}
|
|
|
|
// RegionFlowFrame is a optical flow representation where each region has a
|
|
// consistent optical flow (adheres to local translational model).
|
|
// Regions are arranged in a regular grid according to BlockDescriptor.
|
|
// Next tag: 11.
|
|
message RegionFlowFrame {
|
|
// Next tag: 8
|
|
message RegionFlow {
|
|
required int32 region_id = 1;
|
|
|
|
// Mean anchor point (centroid) of flow vector and mean flow.
|
|
optional float centroid_x = 2 [default = 0];
|
|
optional float centroid_y = 3 [default = 0];
|
|
optional float flow_x = 4 [default = 0];
|
|
optional float flow_y = 5 [default = 0];
|
|
|
|
repeated RegionFlowFeature feature = 7;
|
|
|
|
// Deprecated fields.
|
|
extensions 6;
|
|
}
|
|
|
|
// Sorted by id for quick lookup.
|
|
repeated RegionFlow region_flow = 1;
|
|
|
|
// Total number of features in all RegionFlow's.
|
|
optional int32 num_total_features = 2 [default = 0];
|
|
|
|
// If set, indicates that the frame's region flow is unstable.
|
|
// (not enough features or coverage too low).
|
|
optional bool unstable_frame = 4 [default = false];
|
|
|
|
// Blur score of the current frame is defined as the n-th percentile
|
|
// of the corneress of the input frame evaluated over regions of high
|
|
// corneress. For details see BlurScoreOptions in
|
|
// region_flow_computation.proto.
|
|
// The actual value is pretty meaningless, but relative to the blur score
|
|
// of other frames one can detect blurry frames, e.g. by a 'significant'
|
|
// local maxima in a sequence of blur_scores.
|
|
optional float blur_score = 7;
|
|
|
|
optional int32 frame_width = 8;
|
|
optional int32 frame_height = 9;
|
|
|
|
// Region flow is estimated using a grid of equal sized bins as regions.
|
|
// BlockDescriptor specifies size of bins/blocks.
|
|
message BlockDescriptor {
|
|
optional int32 block_width = 1;
|
|
optional int32 block_height = 2;
|
|
optional int32 num_blocks_x = 3 [default = 0];
|
|
optional int32 num_blocks_y = 4 [default = 0];
|
|
}
|
|
optional BlockDescriptor block_descriptor = 10;
|
|
|
|
// Deprecated fields.
|
|
extensions 3, 5, 6;
|
|
}
|
|
|
|
// Encapsulates a list of features with associated flow.
|
|
// Can be extracted from RegionFlow via GetRegionFlowFeatureList
|
|
// declared in region_flow.h. This is the essential (additional) information
|
|
// required by Cropper using wobble_suppression with displacements.
|
|
// Next tag: 14
|
|
message RegionFlowFeatureList {
|
|
repeated RegionFlowFeature feature = 1;
|
|
optional int32 frame_width = 2;
|
|
optional int32 frame_height = 3;
|
|
|
|
// Set from corresponding RegionFlowFrame field.
|
|
optional bool unstable = 4 [default = false];
|
|
|
|
// Records the minimum distance from the image border for each feature and
|
|
// matching feature (if enforced > 0).
|
|
optional int32 distance_from_border = 5 [default = 0];
|
|
|
|
// Set from corresponding RegionFlowFrame field.
|
|
optional float blur_score = 6;
|
|
|
|
// If set, indicates, that features represent long tracks, i.e. each feature
|
|
// has a valid track_id() >= 0.
|
|
optional bool long_tracks = 7 [default = false];
|
|
|
|
// If long_tracks, stores number of long feature tracks that got rejected in
|
|
// this frame, as their patches were deemed inconsistent with the track's very
|
|
// first extracted patch.
|
|
optional float frac_long_features_rejected = 8 [default = 0];
|
|
|
|
// Measures visual consistency between adjacent frames. In particular, stores
|
|
// the absolute *change* in visual difference between two adjancent frame
|
|
// pairs, i.e. the modulus of the 2nd derivative of the frame appearance.
|
|
// Normalized w.r.t. number of channels and total pixels of the underlying
|
|
// frame.
|
|
// In particular for sudden changes (e.g. shot boundaries) this value will
|
|
// be significantly non-zero (> 0.05).
|
|
// Negative value per default indicates no consistency has been computed.
|
|
optional float visual_consistency = 9 [default = -1];
|
|
|
|
// Timestamp in micro seconds of the underlying frame, that is the frame
|
|
// for which the source features (not matching features) were computed.
|
|
optional int64 timestamp_usec = 10 [default = 0];
|
|
|
|
// Denotes the frame that flow was computed w.r.t. to, locally to the current
|
|
// frame. For example, if current frame is N, N + match_frame is the matching
|
|
// frame that flow was computed to.
|
|
// Values < 0 indicate backward tracking, while values > 0 indicate forward
|
|
// tracking. By default, for empty feature lists, matching frame is the
|
|
// same as current frame, i.e. match_frame = 0.
|
|
optional int32 match_frame = 11 [default = 0];
|
|
|
|
// Set, if frame is estimated to be an exact duplicate of the previous frame.
|
|
optional bool is_duplicated = 12 [default = false];
|
|
|
|
// Stores all the tracked ids that have been discarded actively in this frame.
|
|
// This information will be popluated via RegionFlowFeatureList, so that the
|
|
// downstreaming modules can receive it and use it to avoid misjudgement on
|
|
// tracking continuity.
|
|
// Discard reason:
|
|
// (1) A tracked feature has too long track, which might create drift.
|
|
// (2) A tracked feature in a highly densed area, which provides little value.
|
|
repeated int32 actively_discarded_tracked_ids = 13;
|
|
}
|
|
|
|
// Salient point location (normalized w.r.t. frame_width and frame_height, i.e.
|
|
// specified in the domain [0, 1] x [0, 1]).
|
|
|
|
// For TYPE_INCLUDE:
|
|
// During retargeting and stabilization salient points introduce constraints
|
|
// that will try to keep the normalized location in the rectangle
|
|
// frame_size - normalized bounds.
|
|
// For this soft constraints are used, therefore the weight specifies
|
|
// how "important" the salient point is (higher is better).
|
|
// In particular for each point p the retargeter introduces two pairs of
|
|
// constraints of the form:
|
|
// x - slack < width - right
|
|
// and x + slack > 0 + left, with slack > 0
|
|
// where the weight specifies the importance of the slack.
|
|
//
|
|
// For TYPE_EXCLUDE_*:
|
|
// Similar to above, but constraints are introduced to keep
|
|
// the point to the left of the left bound OR the right of the right bound.
|
|
// In particular:
|
|
// x - slack < left OR
|
|
// x + slack >= right
|
|
// Similar to above, the weight specifies the importance of the slack.
|
|
//
|
|
// Note: Choosing a too high weight can lead to
|
|
// jerkiness as the stabilization essentially starts tracking the salient point.
|
|
message SalientPoint {
|
|
// Normalized location of the point (within domain [0, 1] x [0, 1].
|
|
optional float norm_point_x = 1 [default = 0.0];
|
|
optional float norm_point_y = 2 [default = 0.0];
|
|
|
|
enum SalientPointType {
|
|
TYPE_INCLUDE = 1;
|
|
TYPE_EXCLUDE_LEFT = 2;
|
|
TYPE_EXCLUDE_RIGHT = 3;
|
|
}
|
|
|
|
// Salient point type. By default we try to frame the salient point within
|
|
// the bounding box specified by left, bottom, right, top. Alternatively, one
|
|
// can choose to exclude the point. For details, see discussion above.
|
|
optional SalientPointType type = 11 [default = TYPE_INCLUDE];
|
|
|
|
// Bounds are specified in normalized coordinates [0, 1], FROM the specified
|
|
// border. Opposing bounds (e.g. left and right) may not add to values
|
|
// larger than 1.
|
|
// Default bounds center salient point within centering third of the frame.
|
|
optional float left = 3 [default = 0.3];
|
|
optional float bottom = 4 [default = 0.3];
|
|
optional float right = 9 [default = 0.3];
|
|
optional float top = 10 [default = 0.3];
|
|
|
|
optional float weight = 5 [default = 15];
|
|
|
|
// In addition salient point can represent a region of interest (defined as
|
|
// ellipse of size norm_major x norm_minor (normalized to [0, 1] domain)
|
|
// which orientation is given by angle (in radians in [0, pi]).
|
|
// Due to aspect ratio change of the normalized domain, it is recommended that
|
|
// transformations to other domains are done via the ScaleSalientPoint
|
|
// function.
|
|
optional float norm_major = 6;
|
|
optional float norm_minor = 7;
|
|
|
|
// Angle of major axis with x-axis (counter-clock wise, in radians).
|
|
optional float angle = 8;
|
|
|
|
extensions 20000 to max;
|
|
}
|
|
|
|
// Aggregates SalientPoint's for a frame.
|
|
message SalientPointFrame {
|
|
repeated SalientPoint point = 1;
|
|
|
|
extensions 20000 to max;
|
|
}
|