// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mediapipe; import "mediapipe/util/tracking/motion_models.proto"; import "mediapipe/util/tracking/region_flow.proto"; // Messages encapsulating compressed and uncompressed TrackingData. // // Uncompressed tracking data can be aggregated via an TrackingDataChunk // (e.g. to be cached to file per chunk). The whole chunk persists in memory // after reading. // // Compressed tracking data can be aggregated as binary encoded TrackingData // messages into two container formats (with support for random seeking): // 1) TrackingContainerProto: // Encoding using proto buffer wire format, using default proto // serialization and de-serialization to binary string. // The container uses the MetaData message to store the stream offsets and // milliseconds for each frame of TrackingData. TrackingData itself is custom // encoded to binary using FlowPackager::EncodeTrackingData and the resulting // binary blob wrapped in a BinaryTrackingData message. // 2) TrackingContainerFormat: // Encoding without any dependencies to protobuffers, for clients without // proto buffer support. // Encoding is based on encoding binary blobs of data wrapped into repeated // containers. The layout of a container is described by the message // TrackingContainer and serialized to binary data as described below // (without using proto encoding). Therefore, message TrackingContainer is // mostly for documentation purposes than for direct use. // The format is described by the proto message TrackingContainerFormat (used // internally by FlowPackager) however serialization and de-serialization // to binary string is performed using custom methods supplied by // FlowPackager (TrackingContainerFormatToBinary and // TrackingContainerFormatFromBinary). // The format stores the MetaData first as above, although using custom // encoding. TrackingData is encoded to binary as above using // FlowPackager::EncodeTrackingData and the resulting binary blob is storred // within a TrackingContainer. // Next flag: 9 message TrackingData { enum FrameFlags { FLAG_PROFILE_BASELINE = 0; FLAG_PROFILE_HIGH = 1; FLAG_HIGH_FIDELITY_VECTORS = 2; FLAG_BACKGROUND_UNSTABLE = 4; // Background model could not be estimated. FLAG_DUPLICATED = 8; // Frame is duplicated, i.e. identical to // previous one. // Indicates the beginning of a new chunk. In this case the track_id's // are not compatible w.r.t. previous one. FLAG_CHUNK_BOUNDARY = 16; } optional int32 frame_flags = 1 [default = 0]; // Tracking data is resolution independent specified w.r.t. // specified domain. optional int32 domain_width = 2; optional int32 domain_height = 3; // Aspect ratio (w/h) of the original frame tracking data was computed from. optional float frame_aspect = 6 [default = 1.0]; optional Homography background_model = 4; // Stores num_elements vectors of motion data. (x,y) position encoded via // row_indices and col_starts, as compressed sparse column matrix storage // format: // (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29), // Vector data is stored as (dx, dy) position. Optionally we store the fitting // error and track id for each feature. message MotionData { optional int32 num_elements = 1; // #num_elements pairs (flow_x, flow_y) densely packed. repeated float vector_data = 2 [packed = true]; // Stores corresponding track index for each feature. Features belonging // to the same track over time are assigned the same id. // NOTE: Due to size, tracking ids are never stored as compressed binary // tracking data. repeated int32 track_id = 3 [packed = true]; // # num_elements row indices. repeated int32 row_indices = 4 [packed = true]; // Start index in above array for each column (#domain_width + 1 entries). repeated int32 col_starts = 5 [packed = true]; // Feature descriptors for num_elements feature points. repeated BinaryFeatureDescriptor feature_descriptors = 6; // Stores all the tracked ids that have been discarded actively. This // information will be used by downstreaming to avoid misjudgement on // tracking continuity. repeated int32 actively_discarded_tracked_ids = 7; } optional MotionData motion_data = 5; // Total number of features in our analysis optional uint32 global_feature_count = 7; // Average of all motion vector magnitudes (without accounting for any motion // model), within 10th to 90th percentile (to remove outliers). optional float average_motion_magnitude = 8; } message TrackingDataChunk { message Item { optional TrackingData tracking_data = 1; // Global frame index. optional int32 frame_idx = 2; // Corresponding timestamp. optional int64 timestamp_usec = 3; // Previous frame timestamp. optional int64 prev_timestamp_usec = 4; } repeated Item item = 1; // Set as marker for last chunk. optional bool last_chunk = 2 [default = false]; // Set as marker for first chunk. optional bool first_chunk = 3 [default = false]; } // TrackingData in compressed binary format. Obtainable via // FlowPackager::EncodeTrackingData. Details of binary encode are below. message BinaryTrackingData { // TrackingContainer::header = "TRAK" optional bytes data = 1; } // Detailed explanation of binary Tracking data encode (LITTLE ENDIAN encode!) // TrackingData is stored in binary as a struct of the above fields and the // compressed motion data in sparse column matrix storage format. // (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29) // Specifically, TrackingData is encoded as: // { frame_flags : 32 bit int (from member) // domain_width : 32 bit int (from member) // domain_height : 32 bit int (from member) // frame_aspect : 32 bit float (from member) // // background_model : 6 * 32 bit float (dx, dy, a, b, c, d of AffineModel) // scale : 32 bit float (scale vectors are multiplied with) // num_vectors : 32 bit int (from member num_elements) // // col_start_delta : (domain_width + 1) * 8 bit uint (col starts delta // encoded) // row_idx_size : 32 bit int (size of row_idx array <= num_vectors) // row_idx : row_idx_size * 8 bit uint // vector_size : 32 bit int (size of vector_data) // vector_data : vector_size * [8 bit | 16 bit] int // (depending on FLAG_HIGH_FIDELITY_VECTORS) // } // // >> Baseline encode << // Scale is determined such that maximum vector value (maximum across x and y) // is mapped to highest 8 bit or 16 bit SIGNED int // (i.e. 7 or 15 bit resolution respectively). // Vector values are multiplied by this scale (storring float in int with // truncation) and (dx, dy) is packed as [dy | dx] into a 16bit or 32 bit word. // Unpacking requires therefore dividing the vector values by scale. // // Column starts are delta compressed, that is, col_start_delta[i] stores // col_starts(i) - col_starts(i - 1) from MotionData. // // Row indices are directly stored at 8 bit uints, that is row_idx_size == // num_vectors in this case. // // // >> High profile encode << // Scale is determined as above but for maximum vector deltas (maximum across x // and y of magnitude in difference between two adjacent vectors). Vector value // deltas are multiplied by this scale before encoding. // // Encoding is more complex compared to baseline. Instead of vector value, delta // vector values (difference in dx = ddx, difference in dy = ddy) // are multiplied by scale and storred packed as [ddy | ddx] into to 16bit or // 32bit word. Compression algorithm accounts for error accumulation, so // unpacking should first add deltas in integer domain (for x and y separately) // and then divide by scale to yield (an approximation) of the // original vector value. // Most importantly, not every vector value is storred, but only if the delta is // above the FlowPackagerOptions::high_profile_reuse_threshold, in which case we // advance to the next vector data. Otherwise the previous vector is used. // // The information whether to advance is stored for each vector in the // highest bit of of the row index (FlowPackagerOptions::ADVANCE_FLAG). Row // indicies are not storred as in the baseline profile directly, but as deltas // (reset at the beginning of every column). As deltas are small it is often // possible to store two deltas (if both are < 8) in a single byte. This is // indicated by the second highest flag in the row index // (FlowPackagerOptions::DOUBLE_INDEX_ENCODE). If set, row index stores // [row_delta_1 | row_delta_2] in the lower 6 bit. Note, that the advance flag // applies uniformly to both deltas in this case. // Sidenote (edge case): Due to the use of the top 2 bits as flags, // at times we cannot store the full row delta in the lower 6 bits. // In this case the vector is duplicated (using the ADVANCE_FLAG) // until the delta sum of duplicated vectors reaches the original delta. // Consequently, the compressed vector field in high profile may contain a few // vectors more than the original. // // Column starts are delta compressed as in baseline, but account for double // index encodes. Therefore each column delta is reduced by the number of double // index encodes occuring for this column. This has to be replicated on the // decoding side, each delta needs to be increased by the number of double index // encodes encountered during encoding. // Stores offsets for random seek and time offsets for each frame of // TrackingData. Stream offsets are specified relative w.r.t. end of metadata // blob. // Offsets specify start of the corresponding binary encoded TrackingContainer // (for TrackingContainerFormat) or BinaryTrackingData proto (for // TrackingContainerProto). message MetaData { // TrackingContainer::header = "META" optional fixed32 num_frames = 2; message TrackOffset { optional fixed32 msec = 1; // Time offset of the metadata in msec. optional fixed32 stream_offset = 2; // Offset of TrackingContainer or // respectively BinaryTrackingData // in stream. // Specifed w.r.t. end of the Metadata. } repeated TrackOffset track_offsets = 3; } // TrackingContainer is self-describing container format to store arbitrary // chunks of binary data. Each container is typed via its 4 character header, // versioned via an int, and followed by the size of the binary data and the // actual data. Designed for clients without availability of protobuffer // support. // Note: This message is mainly used for documentation purposes and uses custom // encoding as specified by FlowPackager::TrackingContainerFormatToBinary. // Default binary size of a TrackingContainer (DO NOT CHANGE!): // header: 4 byte + // version: 4 byte + // size: 4 byte + // data #size // SUM: 12 + #size. message TrackingContainer { optional string header = 1; // 4 character header. optional fixed32 version = 2 [default = 1]; // Version information. optional fixed32 size = 3; // Size of binary data held by container optional bytes data = 4; // Binary data encoded. // DO NOT alter layout of TrackingContainer. // Use version to extend or alter encoded binary data. } // Container format for clients without proto support (written via // FlowPackager::TrackingContainerFormatToBinary and read via // FlowPackager::TrackingContainerFormatFromBinary). // Proto here is intermediate format for documentationa and internal use. // Stores multiple TrackingContainers of different types. // Meta data is storred first, to facilitate random seek (via stream offset // positions) to arbitrary binary TrackinData. Termination container signals end // of stream. message TrackingContainerFormat { optional TrackingContainer meta_data = 1; // Wraps binary meta data, via // custom encode. repeated TrackingContainer track_data = 2; // Wraps BinaryTrackingData. // Add new TrackingContainers above before end of stream indicator. // Zero sized termination container with TrackingContainer::header = "TERM". optional TrackingContainer term_data = 3; } // Simplified proto format of above TrackingContainerFormat. Instead of using // self-describing TrackingContainer's, we simply use the proto wire format for // encoding and decoding (proto format is typed and versioned via ids). message TrackingContainerProto { optional MetaData meta_data = 1; repeated BinaryTrackingData track_data = 2; } // Options controlling compression and encoding. message FlowPackagerOptions { // Tracking data is resolution independent specified w.r.t. // specified domain. Only values <= 256 are supported if binary tracking data // is requested to be supported (see below). optional int32 domain_width = 1 [default = 256]; optional int32 domain_height = 2 [default = 192]; // Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding // is not required, can be set to false in which case a higher domain_width // can be used. optional bool binary_tracking_data_support = 6 [default = true]; optional bool use_high_profile = 3 [default = false]; // If set uses 16 bit encode for vector data, in BinaryTrackingData, // otherwise only 8 bits are used. optional bool high_fidelity_16bit_encode = 4 [default = true]; // In high profile encode, re-use previously encoded vector when absolute // difference to current vector is below threshold. optional float high_profile_reuse_threshold = 5 [default = 0.5]; // High profile encoding flags. enum HighProfileEncoding { ADVANCE_FLAG = 0x80; DOUBLE_INDEX_ENCODE = 0x40; INDEX_MASK = 0x3F; } }