242 lines
10 KiB
Protocol Buffer
242 lines
10 KiB
Protocol Buffer
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto2";
|
|
|
|
package mediapipe.autoflip;
|
|
|
|
import "mediapipe/examples/desktop/autoflip/autoflip_messages.proto";
|
|
import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto";
|
|
|
|
// All relevant information for key frames, including timestamp and detected
|
|
// features. This object should be generated by calling PackKeyFrameInfo() in
|
|
// the util namespace. It is passed in to ComputeFrameCropRegion().
|
|
message KeyFrameInfo {
|
|
// Frame timestamp (in microseconds).
|
|
optional int64 timestamp_ms = 1;
|
|
// Detected features.
|
|
optional DetectionSet detections = 2;
|
|
}
|
|
|
|
// User-specified key frame crop options (such as target width and height).
|
|
message KeyFrameCropOptions {
|
|
// Target crop size.
|
|
// Note: if you are using the SceneCroppingCalculator, DO NOT set these fields
|
|
// manually as they will be then overwritten inside the calculator.
|
|
optional int32 target_width = 1;
|
|
optional int32 target_height = 2;
|
|
// Option for how region score is aggregated from individual feature scores.
|
|
// TODO: consider merging this enum type into the signal fusing
|
|
// calculator.
|
|
enum ScoreAggregationType {
|
|
// Unknown value (should not be used).
|
|
UNKNOWN = 0;
|
|
// Takes the score of the feature with maximum score.
|
|
MAXIMUM = 1;
|
|
// Takes the sum of the scores of the required regions.
|
|
SUM_REQUIRED = 2;
|
|
// Takes the sum of the scores of all the regions that are fully covered.
|
|
SUM_ALL = 3;
|
|
// Uses a constant score 1.0 for all crop regions.
|
|
CONSTANT = 4;
|
|
}
|
|
optional ScoreAggregationType score_aggregation_type = 3 [default = SUM_ALL];
|
|
// Minimum centered coverage fraction (in length, not area) for a non-required
|
|
// region to be included in the crop region. Applies to both dimensions.
|
|
optional float non_required_region_min_coverage_fraction = 4 [default = 0.5];
|
|
}
|
|
|
|
// Key frame crop result containing the crop region rectangle, along with
|
|
// summary information on the cropping, such as whether all required regions
|
|
// could fit inside the target size, and what fraction of non-required regions
|
|
// are fully covered. This object is returned by ComputeFrameCropRegion() in
|
|
// the FrameCropRegionComputer class.
|
|
message KeyFrameCropResult {
|
|
// Successfully covers all required features. If there are no required
|
|
// regions, this field is set to true.
|
|
optional bool are_required_regions_covered_in_target_size = 1;
|
|
// Fraction of non-required features covered.
|
|
optional float fraction_non_required_covered = 2;
|
|
// Whether required crop region is empty (no detections).
|
|
optional bool required_region_is_empty = 3;
|
|
// Whether (full) crop region is empty (no detections).
|
|
optional bool region_is_empty = 4;
|
|
// Computed required crop region.
|
|
optional Rect required_region = 5;
|
|
// Computed (full) crop region.
|
|
optional Rect region = 6;
|
|
// Score of the computed crop region based on the detected features.
|
|
optional float region_score = 7;
|
|
// Frame timestamp (in microseconds).
|
|
optional int64 timestamp_ms = 8;
|
|
}
|
|
|
|
// Compact processed scene key frame info containing timestamp, center position,
|
|
// and score. Each key frame has one SceneKeyFrameCompactInfo in
|
|
// SceneKeyFrameCropSummary.
|
|
message SceneKeyFrameCompactInfo {
|
|
// Key frame timestamp (in microseconds).
|
|
optional int64 timestamp_ms = 1;
|
|
// Key frame crop region center in the horizontal/vertical directions (in
|
|
// pixels).
|
|
optional float center_x = 2;
|
|
optional float center_y = 3;
|
|
// Key frame crop region score.
|
|
optional float score = 4;
|
|
}
|
|
|
|
// Summary information for the key frame crop results in a scene. Computed by
|
|
// AnalyzeSceneKeyFrameCropResults() in the SceneCameraMotionAnalyzer class.
|
|
// Used to decide camera motion type and populate salient point frames.
|
|
message SceneKeyFrameCropSummary {
|
|
// Scene frame size.
|
|
optional int32 scene_frame_width = 1;
|
|
optional int32 scene_frame_height = 2;
|
|
|
|
// Number of key frames in the scene.
|
|
optional int32 num_key_frames = 3;
|
|
// Scene key frame compact infos.
|
|
repeated SceneKeyFrameCompactInfo key_frame_compact_infos = 4;
|
|
|
|
// The minimum/maximum values of key frames' crop centers in the horizontal/
|
|
// vertical directions.
|
|
optional float key_frame_center_min_x = 5;
|
|
optional float key_frame_center_max_x = 6;
|
|
optional float key_frame_center_min_y = 7;
|
|
optional float key_frame_center_max_y = 8;
|
|
|
|
// The union of all the key frame required crop regions. When camera is steady
|
|
// the crop window is set to cover this union.
|
|
optional Rect key_frame_required_crop_region_union = 9;
|
|
|
|
// The minimum/maximum scores of key frames' crop regions.
|
|
optional float key_frame_min_score = 10;
|
|
optional float key_frame_max_score = 11;
|
|
|
|
// Size of the scene's crop window, calculated as the maximum of the target
|
|
// size and the largest size of the key frames' crop regions in the scene.
|
|
optional int32 crop_window_width = 12;
|
|
optional int32 crop_window_height = 13;
|
|
|
|
// Indicator for whether the scene has any frame with any salient region.
|
|
optional bool has_salient_region = 14;
|
|
// Indicator for whether the scene has any frame with any required salient
|
|
// region.
|
|
optional bool has_required_salient_region = 15;
|
|
// Percentage of key frames that are successfully cropped (i.e. covers all
|
|
// required regions inside the target size).
|
|
optional float frame_success_rate = 16;
|
|
// Amount of motion in the horizontal/vertical direction (i.e. the horizontal/
|
|
// vertical range of the key frame crop centers' position as a fraction of
|
|
// frame width/height).
|
|
optional float horizontal_motion_amount = 17;
|
|
optional float vertical_motion_amount = 18;
|
|
}
|
|
|
|
// Scene camera motion determined by the SceneCameraMotionAnalyzer class.
|
|
message SceneCameraMotion {
|
|
// Camera focuses on a fixed center throughout the scene.
|
|
message SteadyMotion {
|
|
// Steady look-at center in horizontal/vertical directions (in pixels).
|
|
optional float steady_look_at_center_x = 1;
|
|
optional float steady_look_at_center_y = 2;
|
|
}
|
|
// Camera tracks key frame salient region centers.
|
|
message TrackingMotion {
|
|
// Fields to be added if necessary.
|
|
}
|
|
// Camera sweeps from one point to another.
|
|
message SweepingMotion {
|
|
// Starting and ending center positions for camera sweeping in pixels.
|
|
optional float sweep_start_center_x = 1;
|
|
optional float sweep_start_center_y = 2;
|
|
optional float sweep_end_center_x = 3;
|
|
optional float sweep_end_center_y = 4;
|
|
}
|
|
oneof motion_type {
|
|
SteadyMotion steady_motion = 1;
|
|
TrackingMotion tracking_motion = 2;
|
|
SweepingMotion sweeping_motion = 3;
|
|
// Other types that we might support later.
|
|
}
|
|
}
|
|
|
|
// User-specified options for analyzing scene camera motion from a collection of
|
|
// key frame crop regions.
|
|
message SceneCameraMotionAnalyzerOptions {
|
|
reserved 9;
|
|
// If there is small motion within the scene keep the camera steady at the
|
|
// center.
|
|
optional float motion_stabilization_threshold_percent = 1 [default = .30];
|
|
// Snap to center if there is small motion and already focused closed to the
|
|
// center.
|
|
optional float snap_center_max_distance_percent = 2 [default = .08];
|
|
// Maximum weight for a constraint. Scales scores accordingly so that the
|
|
// maximum score is equal to this weight.
|
|
optional float maximum_salient_point_weight = 3 [default = 100.0];
|
|
// Normalized bound for SalientPoint's in the frame from the border. This is
|
|
// uniformly applied to the left, right, top, and bottom. It should be
|
|
// strictly less than 0.5. A narrower bound (closer to 0.5) gives better
|
|
// constraint enforcement.
|
|
optional float salient_point_bound = 4 [default = 0.48];
|
|
// Indicator for whether sweeping is allowed. Note that if a scene can be
|
|
// seamlessly padded with solid background color, sweeping will be disabled
|
|
// regardlessly of the value of this flag.
|
|
optional bool allow_sweeping = 5 [default = true];
|
|
// Minimal scene time span in seconds to allow camera sweeping.
|
|
optional float minimum_scene_span_sec_for_sweeping = 6 [default = 1.0];
|
|
// If success rate in a scene is less than this, then use camera sweeping.
|
|
optional float minimum_success_rate_for_sweeping = 7 [default = 0.4];
|
|
// If true, sweep entire frame. Otherwise, sweep the crop window.
|
|
optional bool sweep_entire_frame = 8 [default = true];
|
|
// When no salient region is received, the default behavior is the return the
|
|
// camera to center-focused location. When this flag is set to a value >0,
|
|
// the camera will remain at its last position for this amount of time before
|
|
// recentering (if the last scene camera motion type was steady).
|
|
optional int64 duration_before_centering_us = 10;
|
|
}
|
|
|
|
// Video cropping summary information for debugging/statistics.
|
|
message VideoCroppingSummary {
|
|
message SceneCroppingSummary {
|
|
// Scene span in seconds.
|
|
optional float start_sec = 1;
|
|
optional float end_sec = 2;
|
|
// Indicator for whether this scene was cut at a real physical scene
|
|
// boundary (as opposed to force flush).
|
|
optional bool is_end_of_scene = 3;
|
|
// Scene camera motion.
|
|
optional SceneCameraMotion camera_motion = 4;
|
|
// Indicator for whether the scene is padded.
|
|
optional bool is_padded = 5;
|
|
}
|
|
// Cropping summaries for all the scenes in the video.
|
|
repeated SceneCroppingSummary scene_summaries = 1;
|
|
}
|
|
|
|
message CameraMotionOptions {
|
|
message PolynomialRegressionPathSolver {
|
|
// Number of frames from prior buffer to be used to smooth out camera
|
|
// trajectory when it was a forced flush.
|
|
optional int32 prior_frame_buffer_size = 1 [default = 30];
|
|
}
|
|
oneof camera_model_oneof {
|
|
// Fits a poly line to keypoints to find a smooth camera path.
|
|
PolynomialRegressionPathSolver polynomial_path_solver = 1;
|
|
// Maintains a kinematic state of the camera, updated with keypoints, to
|
|
// find a smooth camera path. Currently optimized for real-time operation.
|
|
KinematicOptions kinematic_options = 2;
|
|
}
|
|
}
|