// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "mediapipe/util/tracking/flow_packager.h" #include #include #include #include #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/vector.h" #include "mediapipe/util/tracking/camera_motion.h" #include "mediapipe/util/tracking/camera_motion.pb.h" #include "mediapipe/util/tracking/motion_estimation.h" #include "mediapipe/util/tracking/motion_models.h" #include "mediapipe/util/tracking/motion_models.pb.h" #include "mediapipe/util/tracking/region_flow.pb.h" namespace mediapipe { FlowPackager::FlowPackager(const FlowPackagerOptions& options) : options_(options) { if (options_.binary_tracking_data_support()) { CHECK_LE(options.domain_width(), 256); CHECK_LE(options.domain_height(), 256); } } namespace { // Performs rounding of float vector position to int. class FeatureIntegerPosition { public: // Scales a feature's location in x and y by scale_x and scale_y respectively. // Limits feature position to the integer domain // [0, width - 1] x [0, height - 1] FeatureIntegerPosition(float scale_x, float scale_y, int width, int height) : scale_x_(scale_x), scale_y_(scale_y), width_(width), height_(height) {} Vector2_i ToIntPosition(const RegionFlowFeature& feature) const { return Vector2_i( std::max(0, std::min(width_ - 1, feature.x() * scale_x_ + 0.5f)), std::max(0, std::min(height_ - 1, feature.y() * scale_y_ + 0.5f))); } private: float scale_x_; float scale_y_; int width_; int height_; }; // Lexicographic compare (first in x, then in y) under scaled integer rounding // as specified by FeatureIntegerPosition. class IntegerColumnComparator { public: IntegerColumnComparator(float scale_x, float scale_y, int width, int height) : integer_pos_(scale_x, scale_y, width, height) {} bool operator()(const RegionFlowFeature& lhs, const RegionFlowFeature& rhs) const { const Vector2_i vec_lhs = integer_pos_.ToIntPosition(lhs); const Vector2_i vec_rhs = integer_pos_.ToIntPosition(rhs); return (vec_lhs.x() < vec_rhs.x()) || (vec_lhs.x() == vec_rhs.x() && vec_lhs.y() < vec_rhs.y()); } private: const FeatureIntegerPosition integer_pos_; }; template inline std::string EncodeToString(const T& value) { std::string s(sizeof(T), 0); memcpy(&s[0], &value, sizeof(T)); return s; } template inline std::string EncodeVectorToString(const std::vector& vec) { std::string s(vec.size() * sizeof(T), 0); typename std::vector::const_iterator iter; char* ptr; for (iter = vec.begin(), ptr = &s[0]; iter != vec.end(); ++iter, ptr += sizeof(T)) { memcpy(ptr, &(*iter), sizeof(T)); } return s; } template inline bool DecodeFromStringView(absl::string_view str, T* result) { CHECK(result != nullptr); if (sizeof(*result) != str.size()) { return false; } memcpy(result, str.data(), sizeof(T)); return true; } template inline bool DecodeVectorFromStringView(absl::string_view str, std::vector* result) { CHECK(result != nullptr); if (str.size() % sizeof(T) != 0) return false; result->clear(); result->reserve(str.size() / sizeof(T)); T value; const char* begin = str.data(); const char* end = str.data() + str.size(); for (const char* ptr = begin; ptr != end; ptr += sizeof(T)) { memcpy(&value, ptr, sizeof(T)); result->push_back(value); } return true; } } // namespace. void FlowPackager::PackFlow(const RegionFlowFeatureList& feature_list, const CameraMotion* camera_motion, TrackingData* tracking_data) const { CHECK(tracking_data); CHECK_GT(feature_list.frame_width(), 0); CHECK_GT(feature_list.frame_height(), 0); // Scale flow to output domain. const float dim_x_scale = options_.domain_width() * (1.0f / feature_list.frame_width()); const float dim_y_scale = options_.domain_height() * (1.0f / feature_list.frame_height()); const bool long_tracks = feature_list.long_tracks(); // Sort features lexicographically. RegionFlowFeatureList sorted_feature_list(feature_list); SortRegionFlowFeatureList(dim_x_scale, dim_y_scale, &sorted_feature_list); tracking_data->set_domain_width(options_.domain_width()); tracking_data->set_domain_height(options_.domain_height()); tracking_data->set_frame_aspect(feature_list.frame_width() * 1.0f / feature_list.frame_height()); tracking_data->set_global_feature_count(feature_list.feature_size()); int flags = 0; if (camera_motion == nullptr || camera_motion->type() > CameraMotion::UNSTABLE_SIM) { flags |= TrackingData::FLAG_BACKGROUND_UNSTABLE; } else { Homography transform; CameraMotionToHomography(*camera_motion, &transform); Homography normalization = HomographyAdapter::Embed( AffineAdapter::FromArgs(0, 0, dim_x_scale, 0, 0, dim_y_scale)); Homography inv_normalization = HomographyAdapter::Embed(AffineAdapter::FromArgs( 0, 0, 1.0f / dim_x_scale, 0, 0, 1.0f / dim_y_scale)); *tracking_data->mutable_background_model() = ModelCompose3(normalization, transform, inv_normalization); } if (camera_motion != nullptr) { tracking_data->set_average_motion_magnitude( camera_motion->average_magnitude()); } if (feature_list.is_duplicated()) { flags |= TrackingData::FLAG_DUPLICATED; } tracking_data->set_frame_flags(flags); const int num_vectors = sorted_feature_list.feature_size(); TrackingData::MotionData* data = tracking_data->mutable_motion_data(); data->set_num_elements(num_vectors); // Initialize col starts with "unseen" marker. std::vector col_start(options_.domain_width() + 1, -1); int last_col = -1; int last_row = -1; FeatureIntegerPosition integer_pos(dim_x_scale, dim_y_scale, options_.domain_width(), options_.domain_height()); // Store feature and corresponding motion (minus camera motion) in // compressed sparse column format: // https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29 for (const auto& feature : sorted_feature_list.feature()) { float flow_x = feature.dx() * dim_x_scale; float flow_y = feature.dy() * dim_y_scale; Vector2_i loc = integer_pos.ToIntPosition(feature); // Convert back to float for accurate background model computation. Vector2_f loc_f = Vector2_f::Cast(loc); if (camera_motion) { Vector2_f residual = HomographyAdapter::TransformPoint( tracking_data->background_model(), loc_f) - loc_f; flow_x -= residual.x(); flow_y -= residual.y(); } data->add_vector_data(flow_x); data->add_vector_data(flow_y); data->add_row_indices(loc.y()); if (feature.has_binary_feature_descriptor()) { data->add_feature_descriptors()->set_data( feature.binary_feature_descriptor().data()); } if (long_tracks) { data->add_track_id(feature.track_id()); } const int curr_col = loc.x(); if (curr_col != last_col) { CHECK_LT(last_col, curr_col); CHECK_EQ(-1, col_start[curr_col]); col_start[curr_col] = data->row_indices_size() - 1; last_col = curr_col; } else { CHECK_LE(last_row, loc.y()); } last_row = loc.y(); } col_start[0] = 0; col_start[options_.domain_width()] = num_vectors; // Fill unset values with previously set value. Propagate end value. for (int i = options_.domain_width() - 1; i > 0; --i) { if (col_start[i] < 0) { DCHECK_GE(col_start[i + 1], 0); col_start[i] = col_start[i + 1]; } } for (const auto& col_idx : col_start) { data->add_col_starts(col_idx); } // Check monotonicity of the row indices. for (int c = 0; c < options_.domain_width(); ++c) { const int r_start = data->col_starts(c); const int r_end = data->col_starts(c + 1); for (int r = r_start; r < r_end - 1; ++r) { CHECK_LE(data->row_indices(r), data->row_indices(r + 1)); } } CHECK_EQ(data->vector_data_size(), 2 * data->row_indices_size()); *data->mutable_actively_discarded_tracked_ids() = feature_list.actively_discarded_tracked_ids(); } void FlowPackager::EncodeTrackingData(const TrackingData& tracking_data, BinaryTrackingData* binary_data) const { CHECK(options_.binary_tracking_data_support()); CHECK(binary_data != nullptr); int32 frame_flags = 0; const bool high_profile = options_.use_high_profile(); if (high_profile) { frame_flags |= TrackingData::FLAG_PROFILE_HIGH; } else { frame_flags |= TrackingData::FLAG_PROFILE_BASELINE; // No op. } if (options_.high_fidelity_16bit_encode()) { frame_flags |= TrackingData::FLAG_HIGH_FIDELITY_VECTORS; } // Copy background flag. frame_flags |= tracking_data.frame_flags() & TrackingData::FLAG_BACKGROUND_UNSTABLE; const TrackingData::MotionData& motion_data = tracking_data.motion_data(); int32 num_vectors = motion_data.num_elements(); // Compute maximum vector or delta vector value. float max_vector_value = 0; if (high_profile) { for (int k = 2; k < 2 * num_vectors; ++k) { max_vector_value = std::max( max_vector_value, fabs(motion_data.vector_data(k) - motion_data.vector_data(k - 2)) * 1.02f); // Expand by 2% to account for // rounding issues. } } else { for (const float vector_value : motion_data.vector_data()) { max_vector_value = std::max(max_vector_value, fabs(vector_value)); } } const int32 domain_width = tracking_data.domain_width(); const int32 domain_height = tracking_data.domain_height(); CHECK_LT(domain_height, 256) << "Only heights below 256 are supported."; const float frame_aspect = tracking_data.frame_aspect(); // Limit vector value from above (to 20% frame diameter) and below (small // eps). const float max_vector_threshold = hypot(domain_width, domain_height) * 0.2f; // Warn if too much truncation. if (max_vector_value > max_vector_threshold * 1.5f) { LOG(WARNING) << "A lot of truncation will occur during encoding. " << "Vector magnitudes are larger than 20% of the " << "frame diameter."; } max_vector_value = std::min(max_vector_threshold, std::max(1e-4f, max_vector_value)); // Compute scales for 16bit and 8bit float -> int conversion. // Use highest bit for sign. const int kByteMax16 = (1 << 15) - 1; const int kByteMax8 = (1 << 7) - 1; // Scale such that highest vector value is mapped to kByteMax int scale_16 = std::ceil(kByteMax16 / max_vector_value); int scale_8 = std::ceil(kByteMax8 / max_vector_value); const int32 scale = options_.high_fidelity_16bit_encode() ? scale_16 : scale_8; const float inv_scale = 1.0f / scale; const int kByteMax = options_.high_fidelity_16bit_encode() ? kByteMax16 : kByteMax8; // Compressed flow to be encoded in binary format. std::vector flow_compressed_16; std::vector flow_compressed_8; flow_compressed_16.reserve(num_vectors); flow_compressed_8.reserve(num_vectors); std::vector row_idx; row_idx.reserve(num_vectors); float average_error = 0; std::vector col_starts(motion_data.col_starts().begin(), motion_data.col_starts().end()); // Separate both implementations for easier readability. // For details please refer to description in proto. // Low profile: // * Encode vectors by scaling to integer format. // * Keep sparse matrix format as is // High profile: // * Encode deltas between vectors scaling them to integers // * Re-use encoded vectors if delta is small, use ADVANCE flag in row // index. // * Delta encode row indices to reduce magnitude. // * If two row deltas are small (< 8), encode in one byte if (!high_profile) { // Traverse columns. for (int c = 0; c < col_starts.size() - 1; ++c) { const int r_start = col_starts[c]; const int r_end = col_starts[c + 1]; for (int r = r_start; r < r_end; ++r) { const float flow_x_32f = motion_data.vector_data(2 * r); const float flow_y_32f = motion_data.vector_data(2 * r + 1); const int flow_x = std::max(-kByteMax, std::min(kByteMax, flow_x_32f * scale)); const int flow_y = std::max(-kByteMax, std::min(kByteMax, flow_y_32f * scale)); average_error += 0.5f * (fabs(flow_x * inv_scale - flow_x_32f) + fabs(flow_y * inv_scale - flow_y_32f)); if (options_.high_fidelity_16bit_encode()) { flow_compressed_16.push_back(flow_x); flow_compressed_16.push_back(flow_y); } else { flow_compressed_8.push_back(flow_x); flow_compressed_8.push_back(flow_y); } DCHECK_LT(motion_data.row_indices(r), 256); row_idx.push_back(motion_data.row_indices(r)); } } } else { // Compress flow. int prev_flow_x = 0; int prev_flow_y = 0; const float reuse_threshold = options_.high_profile_reuse_threshold(); int compressible = 0; std::vector compressions_per_column(domain_width, 0); const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG; const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE; const int kIndexMask = FlowPackagerOptions::INDEX_MASK; // Traverse columns. for (int c = 0; c < motion_data.col_starts().size() - 1; ++c) { const int r_start = col_starts[c]; const int r_end = col_starts[c + 1]; for (int r = r_start; r < r_end; ++r) { int flow_x = 0; int flow_y = 0; bool advance = true; const float flow_x_32f = motion_data.vector_data(2 * r); const float flow_y_32f = motion_data.vector_data(2 * r + 1); // Delta coding of vectors. const float diff_x = flow_x_32f - prev_flow_x * inv_scale; const float diff_y = flow_y_32f - prev_flow_y * inv_scale; // Determine if previous flow can be re-used. if (fabs(diff_x) < reuse_threshold && fabs(diff_y) < reuse_threshold) { advance = false; } else { flow_x = std::max(-kByteMax, std::min(kByteMax, diff_x * scale)); flow_y = std::max(-kByteMax, std::min(kByteMax, diff_y * scale)); prev_flow_x += flow_x; prev_flow_y += flow_y; } average_error += 0.5f * (fabs(prev_flow_x * inv_scale - flow_x_32f) + fabs(prev_flow_y * inv_scale - flow_y_32f)); // Combine into one 32 or 16 bit value (clear sign bits for the // right part before combining). if (advance) { if (options_.high_fidelity_16bit_encode()) { flow_compressed_16.push_back(flow_x); flow_compressed_16.push_back(flow_y); } else { flow_compressed_8.push_back(flow_x); flow_compressed_8.push_back(flow_y); } } // Delta code row indices in high profile mode and use two top bits // for status: // 10: single row encode, use next vector data. // (ADVANCE_FLAG) // // 11: double row encode: (3 bit + 3 bit = maximum of 7 + 7 row delta), // use next vector data for each. // (ADVANCE_FLAG | DOUBLE_INDEX_ENCODE) // // 00: single row encode + no advance (re-use previous vector data). // (no flags set) // // 01: double row encode + no advance (re-use previous vector data for // each). // (DOUBLE_INDEX_ENCODE) // Delta compress. int delta_row = motion_data.row_indices(r) - (r == r_start ? 0 : motion_data.row_indices(r - 1)); CHECK_GE(delta_row, 0); bool combined = false; if (r > r_start) { int prev_row_idx = row_idx.back(); if (!(prev_row_idx & kDoubleIndexEncode) && // Single encode. (prev_row_idx & kAdvanceFlag) == advance) { // Same advance flag. // Both compressible (each index fits in 3 bit). if (delta_row < 8 && (prev_row_idx & kIndexMask) < 8) { // Encode two deltas into 6 bit. prev_row_idx = ((prev_row_idx & 0x07) << 3) | delta_row | kDoubleIndexEncode | (advance ? kAdvanceFlag : 0); row_idx.back() = prev_row_idx; // Record as one compression for this column. ++compressions_per_column[c]; ++compressible; combined = true; } } } if (!combined) { while (delta_row > kIndexMask) { // Special case of large displacement. Duplicate vector until sum of // deltas reaches target delta). row_idx.push_back(kIndexMask | (advance ? kAdvanceFlag : 0)); delta_row -= kIndexMask; advance = false; // Store same vector again, re-use previously // encoded vector data. // Record as one addition for the column. --compressions_per_column[c]; ++num_vectors; } row_idx.push_back(delta_row | (advance ? kAdvanceFlag : 0)); } } } // Count number of advance flags encoded. int encoded = 0; for (int idx : row_idx) { if (idx & kAdvanceFlag) { encoded += (idx & kDoubleIndexEncode) ? 2 : 1; } } if (options_.high_fidelity_16bit_encode()) { CHECK_EQ(2 * encoded, flow_compressed_16.size()); } else { CHECK_EQ(2 * encoded, flow_compressed_8.size()); } // Adjust column start by compressions. int curr_adjust = 0; for (int k = 0; k < domain_width; ++k) { curr_adjust -= compressions_per_column[k]; col_starts[k + 1] += curr_adjust; CHECK_LE(col_starts[k], col_starts[k + 1]); } CHECK_EQ(row_idx.size(), col_starts.back()); CHECK_EQ(num_vectors, row_idx.size() + compressible); } // Delta compress col_starts. std::vector col_start_delta(domain_width + 1, 0); col_start_delta[0] = col_starts[0]; for (int k = 1; k < domain_width + 1; ++k) { const int delta = col_starts[k] - col_starts[k - 1]; CHECK_LT(delta, 256) << "Only up to 255 items per column supported."; col_start_delta[k] = delta; } VLOG(1) << "error: " << average_error / (num_vectors + 1) << " additions: " << num_vectors - motion_data.num_elements(); const Homography& background_model = tracking_data.background_model(); const float scale_x = 1.0f / tracking_data.domain_width(); const float scale_y = 1.0f / tracking_data.domain_height(); Homography homog_scale = HomographyAdapter::Embed( AffineAdapter::FromArgs(0, 0, scale_x, 0, 0, scale_y)); Homography inv_homog_scale = HomographyAdapter::Embed( AffineAdapter::FromArgs(0, 0, 1.0f / scale_x, 0, 0, 1.0f / scale_y)); // Might be just the identity if not set. const Homography background_model_scaled = ModelCompose3(homog_scale, background_model, inv_homog_scale); std::string background_model_string = absl::StrCat(EncodeToString(background_model.h_00()), EncodeToString(background_model.h_01()), EncodeToString(background_model.h_02()), EncodeToString(background_model.h_10()), EncodeToString(background_model.h_11()), EncodeToString(background_model.h_12()), EncodeToString(background_model.h_20()), EncodeToString(background_model.h_21())); std::string* data = binary_data->mutable_data(); data->clear(); int32 vector_size = options_.high_fidelity_16bit_encode() ? flow_compressed_16.size() : flow_compressed_8.size(); int32 row_idx_size = row_idx.size(); absl::StrAppend(data, EncodeToString(frame_flags), EncodeToString(domain_width), EncodeToString(domain_height), EncodeToString(frame_aspect), background_model_string, EncodeToString(scale), EncodeToString(num_vectors), EncodeVectorToString(col_start_delta), EncodeToString(row_idx_size), EncodeVectorToString(row_idx), EncodeToString(vector_size), (options_.high_fidelity_16bit_encode() ? EncodeVectorToString(flow_compressed_16) : EncodeVectorToString(flow_compressed_8))); VLOG(1) << "Binary data size: " << data->size() << " for " << num_vectors << " (" << vector_size << ")"; } std::string PopSubstring(int len, absl::string_view* piece) { std::string result = std::string(piece->substr(0, len)); piece->remove_prefix(len); return result; } void FlowPackager::DecodeTrackingData(const BinaryTrackingData& container_data, TrackingData* tracking_data) const { CHECK(tracking_data != nullptr); absl::string_view data(container_data.data()); int32 frame_flags = 0; int32 domain_width = 0; int32 domain_height = 0; std::vector background_model; int32 scale = 0; int32 num_vectors = 0; float frame_aspect = 0.0f; DecodeFromStringView(PopSubstring(4, &data), &frame_flags); DecodeFromStringView(PopSubstring(4, &data), &domain_width); DecodeFromStringView(PopSubstring(4, &data), &domain_height); DecodeFromStringView(PopSubstring(4, &data), &frame_aspect); CHECK_LE(domain_width, 256); CHECK_LE(domain_height, 256); DecodeVectorFromStringView( PopSubstring(4 * HomographyAdapter::NumParameters(), &data), &background_model); DecodeFromStringView(PopSubstring(4, &data), &scale); DecodeFromStringView(PopSubstring(4, &data), &num_vectors); tracking_data->set_frame_flags(frame_flags); tracking_data->set_domain_width(domain_width); tracking_data->set_domain_height(domain_height); tracking_data->set_frame_aspect(frame_aspect); *tracking_data->mutable_background_model() = HomographyAdapter::FromFloatPointer(&background_model[0], false); TrackingData::MotionData* motion_data = tracking_data->mutable_motion_data(); motion_data->set_num_elements(num_vectors); const bool high_profile = frame_flags & TrackingData::FLAG_PROFILE_HIGH; const bool high_fidelity = frame_flags & TrackingData::FLAG_HIGH_FIDELITY_VECTORS; const float flow_denom = 1.0f / scale; std::vector col_starts_delta; DecodeVectorFromStringView(PopSubstring(domain_width + 1, &data), &col_starts_delta); // Delta decompress. std::vector col_starts; col_starts.reserve(domain_width + 1); int column = 0; for (auto col : col_starts_delta) { column += col; col_starts.push_back(column); } std::vector row_idx; int32 row_idx_size; DecodeFromStringView(PopSubstring(4, &data), &row_idx_size); // Should not have more row indices than vectors. (One for each in baseline // profile, less in high profile). CHECK_LE(row_idx_size, num_vectors); DecodeVectorFromStringView(PopSubstring(row_idx_size, &data), &row_idx); // Records for each vector whether to advance pointer in the vector data array // or re-use previously read data. std::vector advance(num_vectors, true); if (high_profile) { // Unpack row indices, populate advance. const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG; const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE; const int kIndexMask = FlowPackagerOptions::INDEX_MASK; std::vector column_expansions(domain_width, 0); std::vector row_idx_unpacked; row_idx_unpacked.reserve(num_vectors); advance.clear(); for (int c = 0; c < col_starts.size() - 1; ++c) { const int r_start = col_starts[c]; const int r_end = col_starts[c + 1]; uint8 prev_row_idx = 0; for (int r = r_start; r < r_end; ++r) { // Use top bit as indicator to advance. advance.push_back(row_idx[r] & kAdvanceFlag); // Double encode? if (row_idx[r] & kDoubleIndexEncode) { // Indices are encoded as each 3 bit offset within kIndexMask. prev_row_idx += (row_idx[r] >> 3) & 0x7; row_idx_unpacked.push_back(prev_row_idx); prev_row_idx += row_idx[r] & 0x7; row_idx_unpacked.push_back(prev_row_idx); // Duplicate advance setting. advance.push_back(advance.back()); ++column_expansions[c]; } else { // Single encode. prev_row_idx += row_idx[r] & kIndexMask; // Clear status. row_idx_unpacked.push_back(prev_row_idx); } } } row_idx.swap(row_idx_unpacked); CHECK_EQ(num_vectors, row_idx.size()); // Adjust column start by expansions. int curr_adjust = 0; for (int k = 0; k < domain_width; ++k) { curr_adjust += column_expansions[k]; col_starts[k + 1] += curr_adjust; } } CHECK_EQ(num_vectors, col_starts.back()); int vector_data_size; DecodeFromStringView(PopSubstring(4, &data), &vector_data_size); int prev_flow_x = 0; int prev_flow_y = 0; if (high_fidelity) { std::vector vector_data; DecodeVectorFromStringView( PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data), &vector_data); int counter = 0; for (int k = 0; k < num_vectors; ++k) { if (advance[k]) { // Read new vector data. int flow_x = vector_data[counter++]; int flow_y = vector_data[counter++]; if (high_profile) { // Delta decode in high profile. flow_x += prev_flow_x; flow_y += prev_flow_y; prev_flow_x = flow_x; prev_flow_y = flow_y; } motion_data->add_vector_data(flow_x * flow_denom); motion_data->add_vector_data(flow_y * flow_denom); } else { // Re-use previous vector data. motion_data->add_vector_data(prev_flow_x * flow_denom); motion_data->add_vector_data(prev_flow_y * flow_denom); } } CHECK_EQ(vector_data_size, counter); } else { std::vector vector_data; DecodeVectorFromStringView( PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data), &vector_data); int counter = 0; for (int k = 0; k < num_vectors; ++k) { if (advance[k]) { // Read new vector data. int flow_x = vector_data[counter++]; int flow_y = vector_data[counter++]; if (high_profile) { // Delta decode in high profile. flow_x += prev_flow_x; flow_y += prev_flow_y; prev_flow_x = flow_x; prev_flow_y = flow_y; } motion_data->add_vector_data(flow_x * flow_denom); motion_data->add_vector_data(flow_y * flow_denom); } else { // Re-use previous vector data. motion_data->add_vector_data(prev_flow_x * flow_denom); motion_data->add_vector_data(prev_flow_y * flow_denom); } } CHECK_EQ(vector_data_size, counter); } for (auto idx : row_idx) { motion_data->add_row_indices(idx); } for (auto column : col_starts) { motion_data->add_col_starts(column); } } void FlowPackager::BinaryTrackingDataToContainer( const BinaryTrackingData& binary_data, TrackingContainer* container) const { CHECK(container != nullptr); container->Clear(); container->set_header("TRAK"); container->set_version(1); container->set_size(binary_data.data().size()); *container->mutable_data() = binary_data.data(); } void FlowPackager::BinaryTrackingDataFromContainer( const TrackingContainer& container, BinaryTrackingData* binary_data) const { CHECK_EQ("TRAK", container.header()); CHECK_EQ(1, container.version()) << "Unsupported version."; *binary_data->mutable_data() = container.data(); } void FlowPackager::DecodeMetaData(const TrackingContainer& container_data, MetaData* meta_data) const { CHECK(meta_data != nullptr); CHECK_EQ("META", container_data.header()); CHECK_EQ(1, container_data.version()) << "Unsupported version."; absl::string_view data(container_data.data()); int32 num_frames; DecodeFromStringView(PopSubstring(4, &data), &num_frames); meta_data->set_num_frames(num_frames); for (int k = 0; k < num_frames; ++k) { int32 msec; int32 stream_offset; DecodeFromStringView(PopSubstring(4, &data), &msec); DecodeFromStringView(PopSubstring(4, &data), &stream_offset); MetaData::TrackOffset* track_offset = meta_data->add_track_offsets(); track_offset->set_msec(msec); track_offset->set_stream_offset(stream_offset); } } void FlowPackager::FinalizeTrackingContainerFormat( std::vector* timestamps, TrackingContainerFormat* container_format) { CHECK(container_format != nullptr); // Compute binary sizes of track_data. const int num_frames = container_format->track_data_size(); std::vector msecs(num_frames, 0); if (timestamps) { CHECK_EQ(num_frames, timestamps->size()); msecs = *timestamps; } std::vector sizes(num_frames, 0); for (int f = 0; f < num_frames; ++f) { // Default size of container: 12 bytes + binary data size (see comment for // TrackingContainer in flow_packager.proto). sizes[f] = container_format->track_data(f).data().size() + 12; } // Store relative offsets w.r.t. end of MetaData. MetaData meta_data; InitializeMetaData(num_frames, msecs, sizes, &meta_data); // Serialize metadata to binary. TrackingContainer* meta = container_format->mutable_meta_data(); meta->Clear(); meta->set_header("META"); std::string* binary_metadata = meta->mutable_data(); absl::StrAppend(binary_metadata, EncodeToString(meta_data.num_frames())); for (auto& track_offset : *meta_data.mutable_track_offsets()) { absl::StrAppend(binary_metadata, EncodeToString(track_offset.msec()), EncodeToString(track_offset.stream_offset())); } meta->set_size(binary_metadata->size()); // Add term header. TrackingContainer* term = container_format->mutable_term_data(); term->set_header("TERM"); term->set_size(0); } void FlowPackager::FinalizeTrackingContainerProto( std::vector* timestamps, TrackingContainerProto* proto) { CHECK(proto != nullptr); // Compute binary sizes of track_data. const int num_frames = proto->track_data_size(); std::vector msecs(num_frames, 0); if (timestamps) { CHECK_EQ(num_frames, timestamps->size()); msecs = *timestamps; } std::vector sizes(num_frames, 0); TrackingContainerProto temp_proto; BinaryTrackingData* temp_track_data = temp_proto.add_track_data(); for (int f = 0; f < num_frames; ++f) { // Swap current track data in and out of temp_track_data to determine total // encoding size with proto preamble. proto->mutable_track_data(f)->Swap(temp_track_data); sizes[f] = temp_proto.ByteSize(); proto->mutable_track_data(f)->Swap(temp_track_data); } proto->clear_meta_data(); InitializeMetaData(num_frames, msecs, sizes, proto->mutable_meta_data()); } void FlowPackager::InitializeMetaData(int num_frames, const std::vector& msecs, const std::vector& data_sizes, MetaData* meta_data) const { meta_data->set_num_frames(num_frames); CHECK_EQ(num_frames, msecs.size()); CHECK_EQ(num_frames, data_sizes.size()); int curr_offset = 0; for (int f = 0; f < num_frames; ++f) { MetaData::TrackOffset* track_offset = meta_data->add_track_offsets(); track_offset->set_msec(msecs[f]); track_offset->set_stream_offset(curr_offset); curr_offset += data_sizes[f]; } } void FlowPackager::AddContainerToString(const TrackingContainer& container, std::string* binary_data) { CHECK(binary_data != nullptr); std::string header_string(container.header()); CHECK_EQ(4, header_string.size()); std::vector header{header_string[0], header_string[1], header_string[2], header_string[3]}; absl::StrAppend(binary_data, EncodeVectorToString(header), EncodeToString(container.version()), EncodeToString(container.size()), container.data()); } std::string FlowPackager::SplitContainerFromString( absl::string_view* binary_data, TrackingContainer* container) { CHECK(binary_data != nullptr); CHECK(container != nullptr); CHECK_GE(binary_data->size(), 12) << "Data does not contain " << "valid container"; container->set_header(PopSubstring(4, binary_data)); int version; DecodeFromStringView(PopSubstring(4, binary_data), &version); int size; DecodeFromStringView(PopSubstring(4, binary_data), &size); container->set_version(version); container->set_size(size); if (size > 0) { container->set_data(PopSubstring(size, binary_data)); } return container->header(); } void FlowPackager::TrackingContainerFormatToBinary( const TrackingContainerFormat& container_format, std::string* binary) { CHECK(binary != nullptr); binary->clear(); AddContainerToString(container_format.meta_data(), binary); for (const auto& track_data : container_format.track_data()) { AddContainerToString(track_data, binary); } AddContainerToString(container_format.term_data(), binary); } void FlowPackager::TrackingContainerFormatFromBinary( const std::string& binary, TrackingContainerFormat* container_format) { CHECK(container_format != nullptr); container_format->Clear(); absl::string_view data(binary); CHECK_EQ("META", SplitContainerFromString( &data, container_format->mutable_meta_data())); MetaData meta_data; DecodeMetaData(container_format->meta_data(), &meta_data); for (int f = 0; f < meta_data.num_frames(); ++f) { TrackingContainer* container = container_format->add_track_data(); CHECK_EQ("TRAK", SplitContainerFromString(&data, container)); } CHECK_EQ("TERM", SplitContainerFromString( &data, container_format->mutable_term_data())); } void FlowPackager::SortRegionFlowFeatureList( float scale_x, float scale_y, RegionFlowFeatureList* feature_list) const { CHECK(feature_list != nullptr); // Sort features lexicographically. std::sort(feature_list->mutable_feature()->begin(), feature_list->mutable_feature()->end(), IntegerColumnComparator(scale_x, scale_y, options_.domain_width(), options_.domain_height())); } bool FlowPackager::CompatibleForEncodeWithoutDuplication( const TrackingData& tracking_data) const { const TrackingData::MotionData& motion_data = tracking_data.motion_data(); for (int c = 0; c < motion_data.col_starts_size() - 1; ++c) { const int r_start = motion_data.col_starts(c); const int r_end = motion_data.col_starts(c + 1); for (int r = r_start; r < r_end; ++r) { if (motion_data.row_indices(r) - (r == r_start ? 0 : motion_data.row_indices(r - 1)) >= 64) { return false; } } } return true; } } // namespace mediapipe