// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Proto messages used for the AutoFlip Pipeline. syntax = "proto2"; package mediapipe.autoflip; import "mediapipe/framework/calculator_options.proto"; option java_multiple_files = true; // Borders detected on the frame as well as non-border color (if present). // Next tag: 4 message StaticFeatures { // A list of the static parts for a frame. repeated Border border = 1; // The background color (only set if solid color). optional Color solid_background = 2; // Area of the image that is not a border. optional Rect non_static_area = 3; } // A static border area within the video. // Next tag: 3 message Border { // Original location within the input frame. optional Rect border_position = 1; // Position for static area. // Next tag: 3 enum RelativePosition { TOP = 1; BOTTOM = 2; } // Top or bottom position. optional RelativePosition relative_position = 2; } // Rectangle (opencv format). // Next tag: 5 message Rect { optional int32 x = 1; optional int32 y = 2; optional int32 width = 3; optional int32 height = 4; } // Color (RGB 8bit) // Next tag: 4 message Color { optional int32 r = 1; optional int32 g = 2; optional int32 b = 3; } // Rectangle (opencv format). // Next tag: 5 message RectF { optional float x = 1; optional float y = 2; optional float width = 3; optional float height = 4; } // An image region of interest (eg a detected face or object), accompanied by an // importance score. // Next tag: 10 message SalientRegion { reserved 3; // The bounding box for this region in the image. optional Rect location = 1; // The bounding box for this region in the image normalized. optional RectF location_normalized = 8; // A score indicating the importance of this region. optional float score = 2; // A tracking id used to identify this region across video frames. Not always // set. optional int64 tracking_id = 4; // If true, this region is required to be present in the final video (eg it // contains text that cannot be cropped). optional bool is_required = 5 [default = false]; // Type of signal carried in this message. optional SignalType signal_type = 6; // If true, object cannot move in the output window (e.g. text would look // strange moving around). // TODO: this feature is not implemented, remove proto message. optional bool requires_static_location = 7 [default = false]; // When used with ContentZoomingCalculator, this flag can be set in the // SignalFusingCalculator indicating that areas outside of these detections // can be cropped from the frame. When no salient regions have this flag set // true, no zooming is performed. When one or more salient regions have this // flag set true, the max zoom value will be used that keeps all // “only_required” detections within view. The ContentZoomingCalculator // currently supports zooming by finding the size of non-salient top/bottom // borders regions and provides this information to the // SceneCroppingCalculator for reframing. optional bool only_required = 9 [default = false]; } // Stores the message type, including standard types (face, object) and custom // types defined by a string id. // Next tag: 3 message SignalType { enum StandardType { UNSET = 0; // Full face bounding boxed detected. FACE_FULL = 1; // Face landmarks for eyes, nose, chin only. FACE_CORE_LANDMARKS = 2; // All face landmarks (eyes, ears, nose, chin). FACE_ALL_LANDMARKS = 3; // A specific face landmark. FACE_LANDMARK = 4; HUMAN = 5; CAR = 6; PET = 7; OBJECT = 8; MOTION = 9; TEXT = 10; LOGO = 11; USER_HINT = 12; } oneof Signal { StandardType standard = 1; string custom = 2; } } // Features extracted from a image. // Next tag: 3 message DetectionSet { // Mask image showing pixel-wise values at a given location. optional string encoded_mask = 1; // List of rectangle detections. repeated SalientRegion detections = 2; } // General settings needed for multiple calculators. message ConversionOptions { extend mediapipe.CalculatorOptions { optional ConversionOptions ext = 284806832; } // Target output width of the conversion. optional int32 target_width = 1; // Target output height of the conversion. optional int32 target_height = 2; } // Self-contained message that provides all needed information to render // autoflip with an external renderer. One of these messages is required for // each frame of the video. message ExternalRenderFrame { // Rectangle using opencv standard. message Rect { optional float x = 1; optional float y = 2; optional float width = 3; optional float height = 4; } // RGB color [0...255] message Color { optional int32 r = 1; optional int32 g = 2; optional int32 b = 3; } // Rect that must be cropped out of the input frame. It is in the // original dimensions of the input video. The first step to render this // frame is to crop this rect from the input frame. optional Rect crop_from_location = 1; // Rect that must be cropped out of the input frame. It is defined in the // ratio of the frame of the input video. The first step to render this frame // is to crop this rect from the input frame. optional Rect normalized_crop_from_location = 7; // The placement location where the above rect is placed on the output frame. // This will always have the same aspect ratio as the above rect but scaling // may be required. optional Rect render_to_location = 2; // If render_to_location is smaller than the output dimensions of the frame, // fill the rest of the frame with this color. optional Color padding_color = 3; // Timestamp in microseconds of this frame. optional uint64 timestamp_us = 4; // Target width of the cropped video in pixels. |render_to_location| is // relative to this dimension. optional int32 target_width = 5; // Target height of the cropped video in pixels. |render_to_location| is // relative to this dimension. optional int32 target_height = 6; }