mediapipe/mediapipe2/examples/desktop/autoflip/quality/cropping.proto
2021-06-10 23:01:19 +00:00

242 lines
10 KiB
Protocol Buffer

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/autoflip_messages.proto";
import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto";
// All relevant information for key frames, including timestamp and detected
// features. This object should be generated by calling PackKeyFrameInfo() in
// the util namespace. It is passed in to ComputeFrameCropRegion().
message KeyFrameInfo {
// Frame timestamp (in microseconds).
optional int64 timestamp_ms = 1;
// Detected features.
optional DetectionSet detections = 2;
}
// User-specified key frame crop options (such as target width and height).
message KeyFrameCropOptions {
// Target crop size.
// Note: if you are using the SceneCroppingCalculator, DO NOT set these fields
// manually as they will be then overwritten inside the calculator.
optional int32 target_width = 1;
optional int32 target_height = 2;
// Option for how region score is aggregated from individual feature scores.
// TODO: consider merging this enum type into the signal fusing
// calculator.
enum ScoreAggregationType {
// Unknown value (should not be used).
UNKNOWN = 0;
// Takes the score of the feature with maximum score.
MAXIMUM = 1;
// Takes the sum of the scores of the required regions.
SUM_REQUIRED = 2;
// Takes the sum of the scores of all the regions that are fully covered.
SUM_ALL = 3;
// Uses a constant score 1.0 for all crop regions.
CONSTANT = 4;
}
optional ScoreAggregationType score_aggregation_type = 3 [default = SUM_ALL];
// Minimum centered coverage fraction (in length, not area) for a non-required
// region to be included in the crop region. Applies to both dimensions.
optional float non_required_region_min_coverage_fraction = 4 [default = 0.5];
}
// Key frame crop result containing the crop region rectangle, along with
// summary information on the cropping, such as whether all required regions
// could fit inside the target size, and what fraction of non-required regions
// are fully covered. This object is returned by ComputeFrameCropRegion() in
// the FrameCropRegionComputer class.
message KeyFrameCropResult {
// Successfully covers all required features. If there are no required
// regions, this field is set to true.
optional bool are_required_regions_covered_in_target_size = 1;
// Fraction of non-required features covered.
optional float fraction_non_required_covered = 2;
// Whether required crop region is empty (no detections).
optional bool required_region_is_empty = 3;
// Whether (full) crop region is empty (no detections).
optional bool region_is_empty = 4;
// Computed required crop region.
optional Rect required_region = 5;
// Computed (full) crop region.
optional Rect region = 6;
// Score of the computed crop region based on the detected features.
optional float region_score = 7;
// Frame timestamp (in microseconds).
optional int64 timestamp_ms = 8;
}
// Compact processed scene key frame info containing timestamp, center position,
// and score. Each key frame has one SceneKeyFrameCompactInfo in
// SceneKeyFrameCropSummary.
message SceneKeyFrameCompactInfo {
// Key frame timestamp (in microseconds).
optional int64 timestamp_ms = 1;
// Key frame crop region center in the horizontal/vertical directions (in
// pixels).
optional float center_x = 2;
optional float center_y = 3;
// Key frame crop region score.
optional float score = 4;
}
// Summary information for the key frame crop results in a scene. Computed by
// AnalyzeSceneKeyFrameCropResults() in the SceneCameraMotionAnalyzer class.
// Used to decide camera motion type and populate salient point frames.
message SceneKeyFrameCropSummary {
// Scene frame size.
optional int32 scene_frame_width = 1;
optional int32 scene_frame_height = 2;
// Number of key frames in the scene.
optional int32 num_key_frames = 3;
// Scene key frame compact infos.
repeated SceneKeyFrameCompactInfo key_frame_compact_infos = 4;
// The minimum/maximum values of key frames' crop centers in the horizontal/
// vertical directions.
optional float key_frame_center_min_x = 5;
optional float key_frame_center_max_x = 6;
optional float key_frame_center_min_y = 7;
optional float key_frame_center_max_y = 8;
// The union of all the key frame required crop regions. When camera is steady
// the crop window is set to cover this union.
optional Rect key_frame_required_crop_region_union = 9;
// The minimum/maximum scores of key frames' crop regions.
optional float key_frame_min_score = 10;
optional float key_frame_max_score = 11;
// Size of the scene's crop window, calculated as the maximum of the target
// size and the largest size of the key frames' crop regions in the scene.
optional int32 crop_window_width = 12;
optional int32 crop_window_height = 13;
// Indicator for whether the scene has any frame with any salient region.
optional bool has_salient_region = 14;
// Indicator for whether the scene has any frame with any required salient
// region.
optional bool has_required_salient_region = 15;
// Percentage of key frames that are successfully cropped (i.e. covers all
// required regions inside the target size).
optional float frame_success_rate = 16;
// Amount of motion in the horizontal/vertical direction (i.e. the horizontal/
// vertical range of the key frame crop centers' position as a fraction of
// frame width/height).
optional float horizontal_motion_amount = 17;
optional float vertical_motion_amount = 18;
}
// Scene camera motion determined by the SceneCameraMotionAnalyzer class.
message SceneCameraMotion {
// Camera focuses on a fixed center throughout the scene.
message SteadyMotion {
// Steady look-at center in horizontal/vertical directions (in pixels).
optional float steady_look_at_center_x = 1;
optional float steady_look_at_center_y = 2;
}
// Camera tracks key frame salient region centers.
message TrackingMotion {
// Fields to be added if necessary.
}
// Camera sweeps from one point to another.
message SweepingMotion {
// Starting and ending center positions for camera sweeping in pixels.
optional float sweep_start_center_x = 1;
optional float sweep_start_center_y = 2;
optional float sweep_end_center_x = 3;
optional float sweep_end_center_y = 4;
}
oneof motion_type {
SteadyMotion steady_motion = 1;
TrackingMotion tracking_motion = 2;
SweepingMotion sweeping_motion = 3;
// Other types that we might support later.
}
}
// User-specified options for analyzing scene camera motion from a collection of
// key frame crop regions.
message SceneCameraMotionAnalyzerOptions {
reserved 9;
// If there is small motion within the scene keep the camera steady at the
// center.
optional float motion_stabilization_threshold_percent = 1 [default = .30];
// Snap to center if there is small motion and already focused closed to the
// center.
optional float snap_center_max_distance_percent = 2 [default = .08];
// Maximum weight for a constraint. Scales scores accordingly so that the
// maximum score is equal to this weight.
optional float maximum_salient_point_weight = 3 [default = 100.0];
// Normalized bound for SalientPoint's in the frame from the border. This is
// uniformly applied to the left, right, top, and bottom. It should be
// strictly less than 0.5. A narrower bound (closer to 0.5) gives better
// constraint enforcement.
optional float salient_point_bound = 4 [default = 0.48];
// Indicator for whether sweeping is allowed. Note that if a scene can be
// seamlessly padded with solid background color, sweeping will be disabled
// regardlessly of the value of this flag.
optional bool allow_sweeping = 5 [default = true];
// Minimal scene time span in seconds to allow camera sweeping.
optional float minimum_scene_span_sec_for_sweeping = 6 [default = 1.0];
// If success rate in a scene is less than this, then use camera sweeping.
optional float minimum_success_rate_for_sweeping = 7 [default = 0.4];
// If true, sweep entire frame. Otherwise, sweep the crop window.
optional bool sweep_entire_frame = 8 [default = true];
// When no salient region is received, the default behavior is the return the
// camera to center-focused location. When this flag is set to a value >0,
// the camera will remain at its last position for this amount of time before
// recentering (if the last scene camera motion type was steady).
optional int64 duration_before_centering_us = 10;
}
// Video cropping summary information for debugging/statistics.
message VideoCroppingSummary {
message SceneCroppingSummary {
// Scene span in seconds.
optional float start_sec = 1;
optional float end_sec = 2;
// Indicator for whether this scene was cut at a real physical scene
// boundary (as opposed to force flush).
optional bool is_end_of_scene = 3;
// Scene camera motion.
optional SceneCameraMotion camera_motion = 4;
// Indicator for whether the scene is padded.
optional bool is_padded = 5;
}
// Cropping summaries for all the scenes in the video.
repeated SceneCroppingSummary scene_summaries = 1;
}
message CameraMotionOptions {
message PolynomialRegressionPathSolver {
// Number of frames from prior buffer to be used to smooth out camera
// trajectory when it was a forced flush.
optional int32 prior_frame_buffer_size = 1 [default = 30];
}
oneof camera_model_oneof {
// Fits a poly line to keypoints to find a smooth camera path.
PolynomialRegressionPathSolver polynomial_path_solver = 1;
// Maintains a kinematic state of the camera, updated with keypoints, to
// find a smooth camera path. Currently optimized for real-time operation.
KinematicOptions kinematic_options = 2;
}
}