// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mediapipe.autoflip; import "mediapipe/examples/desktop/autoflip/autoflip_messages.proto"; import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto"; // All relevant information for key frames, including timestamp and detected // features. This object should be generated by calling PackKeyFrameInfo() in // the util namespace. It is passed in to ComputeFrameCropRegion(). message KeyFrameInfo { // Frame timestamp (in microseconds). optional int64 timestamp_ms = 1; // Detected features. optional DetectionSet detections = 2; } // User-specified key frame crop options (such as target width and height). message KeyFrameCropOptions { // Target crop size. // Note: if you are using the SceneCroppingCalculator, DO NOT set these fields // manually as they will be then overwritten inside the calculator. optional int32 target_width = 1; optional int32 target_height = 2; // Option for how region score is aggregated from individual feature scores. // TODO: consider merging this enum type into the signal fusing // calculator. enum ScoreAggregationType { // Unknown value (should not be used). UNKNOWN = 0; // Takes the score of the feature with maximum score. MAXIMUM = 1; // Takes the sum of the scores of the required regions. SUM_REQUIRED = 2; // Takes the sum of the scores of all the regions that are fully covered. SUM_ALL = 3; // Uses a constant score 1.0 for all crop regions. CONSTANT = 4; } optional ScoreAggregationType score_aggregation_type = 3 [default = SUM_ALL]; // Minimum centered coverage fraction (in length, not area) for a non-required // region to be included in the crop region. Applies to both dimensions. optional float non_required_region_min_coverage_fraction = 4 [default = 0.5]; } // Key frame crop result containing the crop region rectangle, along with // summary information on the cropping, such as whether all required regions // could fit inside the target size, and what fraction of non-required regions // are fully covered. This object is returned by ComputeFrameCropRegion() in // the FrameCropRegionComputer class. message KeyFrameCropResult { // Successfully covers all required features. If there are no required // regions, this field is set to true. optional bool are_required_regions_covered_in_target_size = 1; // Fraction of non-required features covered. optional float fraction_non_required_covered = 2; // Whether required crop region is empty (no detections). optional bool required_region_is_empty = 3; // Whether (full) crop region is empty (no detections). optional bool region_is_empty = 4; // Computed required crop region. optional Rect required_region = 5; // Computed (full) crop region. optional Rect region = 6; // Score of the computed crop region based on the detected features. optional float region_score = 7; // Frame timestamp (in microseconds). optional int64 timestamp_ms = 8; } // Compact processed scene key frame info containing timestamp, center position, // and score. Each key frame has one SceneKeyFrameCompactInfo in // SceneKeyFrameCropSummary. message SceneKeyFrameCompactInfo { // Key frame timestamp (in microseconds). optional int64 timestamp_ms = 1; // Key frame crop region center in the horizontal/vertical directions (in // pixels). optional float center_x = 2; optional float center_y = 3; // Key frame crop region score. optional float score = 4; } // Summary information for the key frame crop results in a scene. Computed by // AnalyzeSceneKeyFrameCropResults() in the SceneCameraMotionAnalyzer class. // Used to decide camera motion type and populate salient point frames. message SceneKeyFrameCropSummary { // Scene frame size. optional int32 scene_frame_width = 1; optional int32 scene_frame_height = 2; // Number of key frames in the scene. optional int32 num_key_frames = 3; // Scene key frame compact infos. repeated SceneKeyFrameCompactInfo key_frame_compact_infos = 4; // The minimum/maximum values of key frames' crop centers in the horizontal/ // vertical directions. optional float key_frame_center_min_x = 5; optional float key_frame_center_max_x = 6; optional float key_frame_center_min_y = 7; optional float key_frame_center_max_y = 8; // The union of all the key frame required crop regions. When camera is steady // the crop window is set to cover this union. optional Rect key_frame_required_crop_region_union = 9; // The minimum/maximum scores of key frames' crop regions. optional float key_frame_min_score = 10; optional float key_frame_max_score = 11; // Size of the scene's crop window, calculated as the maximum of the target // size and the largest size of the key frames' crop regions in the scene. optional int32 crop_window_width = 12; optional int32 crop_window_height = 13; // Indicator for whether the scene has any frame with any salient region. optional bool has_salient_region = 14; // Indicator for whether the scene has any frame with any required salient // region. optional bool has_required_salient_region = 15; // Percentage of key frames that are successfully cropped (i.e. covers all // required regions inside the target size). optional float frame_success_rate = 16; // Amount of motion in the horizontal/vertical direction (i.e. the horizontal/ // vertical range of the key frame crop centers' position as a fraction of // frame width/height). optional float horizontal_motion_amount = 17; optional float vertical_motion_amount = 18; } // Scene camera motion determined by the SceneCameraMotionAnalyzer class. message SceneCameraMotion { // Camera focuses on a fixed center throughout the scene. message SteadyMotion { // Steady look-at center in horizontal/vertical directions (in pixels). optional float steady_look_at_center_x = 1; optional float steady_look_at_center_y = 2; } // Camera tracks key frame salient region centers. message TrackingMotion { // Fields to be added if necessary. } // Camera sweeps from one point to another. message SweepingMotion { // Starting and ending center positions for camera sweeping in pixels. optional float sweep_start_center_x = 1; optional float sweep_start_center_y = 2; optional float sweep_end_center_x = 3; optional float sweep_end_center_y = 4; } oneof motion_type { SteadyMotion steady_motion = 1; TrackingMotion tracking_motion = 2; SweepingMotion sweeping_motion = 3; // Other types that we might support later. } } // User-specified options for analyzing scene camera motion from a collection of // key frame crop regions. message SceneCameraMotionAnalyzerOptions { reserved 9; // If there is small motion within the scene keep the camera steady at the // center. optional float motion_stabilization_threshold_percent = 1 [default = .30]; // Snap to center if there is small motion and already focused closed to the // center. optional float snap_center_max_distance_percent = 2 [default = .08]; // Maximum weight for a constraint. Scales scores accordingly so that the // maximum score is equal to this weight. optional float maximum_salient_point_weight = 3 [default = 100.0]; // Normalized bound for SalientPoint's in the frame from the border. This is // uniformly applied to the left, right, top, and bottom. It should be // strictly less than 0.5. A narrower bound (closer to 0.5) gives better // constraint enforcement. optional float salient_point_bound = 4 [default = 0.48]; // Indicator for whether sweeping is allowed. Note that if a scene can be // seamlessly padded with solid background color, sweeping will be disabled // regardlessly of the value of this flag. optional bool allow_sweeping = 5 [default = true]; // Minimal scene time span in seconds to allow camera sweeping. optional float minimum_scene_span_sec_for_sweeping = 6 [default = 1.0]; // If success rate in a scene is less than this, then use camera sweeping. optional float minimum_success_rate_for_sweeping = 7 [default = 0.4]; // If true, sweep entire frame. Otherwise, sweep the crop window. optional bool sweep_entire_frame = 8 [default = true]; // When no salient region is received, the default behavior is the return the // camera to center-focused location. When this flag is set to a value >0, // the camera will remain at its last position for this amount of time before // recentering (if the last scene camera motion type was steady). optional int64 duration_before_centering_us = 10; } // Video cropping summary information for debugging/statistics. message VideoCroppingSummary { message SceneCroppingSummary { // Scene span in seconds. optional float start_sec = 1; optional float end_sec = 2; // Indicator for whether this scene was cut at a real physical scene // boundary (as opposed to force flush). optional bool is_end_of_scene = 3; // Scene camera motion. optional SceneCameraMotion camera_motion = 4; // Indicator for whether the scene is padded. optional bool is_padded = 5; } // Cropping summaries for all the scenes in the video. repeated SceneCroppingSummary scene_summaries = 1; } message CameraMotionOptions { message PolynomialRegressionPathSolver { // Number of frames from prior buffer to be used to smooth out camera // trajectory when it was a forced flush. optional int32 prior_frame_buffer_size = 1 [default = 30]; } oneof camera_model_oneof { // Fits a poly line to keypoints to find a smooth camera path. PolynomialRegressionPathSolver polynomial_path_solver = 1; // Maintains a kinematic state of the camera, updated with keypoints, to // find a smooth camera path. Currently optimized for real-time operation. KinematicOptions kinematic_options = 2; } }