mediapipe/mediapipe/examples/desktop/autoflip/autoflip_messages.proto
MediaPipe Team 36be94f861 Internal change
PiperOrigin-RevId: 501378130
2023-01-11 14:24:00 -08:00

208 lines
6.6 KiB
Protocol Buffer

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Proto messages used for the AutoFlip Pipeline.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator_options.proto";
option java_multiple_files = true;
// Borders detected on the frame as well as non-border color (if present).
// Next tag: 4
message StaticFeatures {
// A list of the static parts for a frame.
repeated Border border = 1;
// The background color (only set if solid color).
optional Color solid_background = 2;
// Area of the image that is not a border.
optional Rect non_static_area = 3;
}
// A static border area within the video.
// Next tag: 3
message Border {
// Original location within the input frame.
optional Rect border_position = 1;
// Position for static area.
// Next tag: 3
enum RelativePosition {
TOP = 1;
BOTTOM = 2;
}
// Top or bottom position.
optional RelativePosition relative_position = 2;
}
// Rectangle (opencv format).
// Next tag: 5
message Rect {
optional int32 x = 1;
optional int32 y = 2;
optional int32 width = 3;
optional int32 height = 4;
}
// Color (RGB 8bit)
// Next tag: 4
message Color {
optional int32 r = 1;
optional int32 g = 2;
optional int32 b = 3;
}
// Rectangle (opencv format).
// Next tag: 5
message RectF {
optional float x = 1;
optional float y = 2;
optional float width = 3;
optional float height = 4;
}
// An image region of interest (eg a detected face or object), accompanied by an
// importance score.
// Next tag: 10
message SalientRegion {
reserved 3;
// The bounding box for this region in the image.
optional Rect location = 1;
// The bounding box for this region in the image normalized.
optional RectF location_normalized = 8;
// A score indicating the importance of this region.
optional float score = 2;
// A tracking id used to identify this region across video frames. Not always
// set.
optional int64 tracking_id = 4;
// If true, this region is required to be present in the final video (eg it
// contains text that cannot be cropped).
optional bool is_required = 5 [default = false];
// Type of signal carried in this message.
optional SignalType signal_type = 6;
// If true, object cannot move in the output window (e.g. text would look
// strange moving around).
// TODO: this feature is not implemented, remove proto message.
optional bool requires_static_location = 7 [default = false];
// When used with ContentZoomingCalculator, this flag can be set in the
// SignalFusingCalculator indicating that areas outside of these detections
// can be cropped from the frame. When no salient regions have this flag set
// true, no zooming is performed. When one or more salient regions have this
// flag set true, the max zoom value will be used that keeps all
// “only_required” detections within view. The ContentZoomingCalculator
// currently supports zooming by finding the size of non-salient top/bottom
// borders regions and provides this information to the
// SceneCroppingCalculator for reframing.
optional bool only_required = 9 [default = false];
}
// Stores the message type, including standard types (face, object) and custom
// types defined by a string id.
// Next tag: 3
message SignalType {
enum StandardType {
UNSET = 0;
// Full face bounding boxed detected.
FACE_FULL = 1;
// Face landmarks for eyes, nose, chin only.
FACE_CORE_LANDMARKS = 2;
// All face landmarks (eyes, ears, nose, chin).
FACE_ALL_LANDMARKS = 3;
// A specific face landmark.
FACE_LANDMARK = 4;
HUMAN = 5;
CAR = 6;
PET = 7;
OBJECT = 8;
MOTION = 9;
TEXT = 10;
LOGO = 11;
USER_HINT = 12;
}
oneof Signal {
StandardType standard = 1;
string custom = 2;
}
}
// Features extracted from a image.
// Next tag: 3
message DetectionSet {
// Mask image showing pixel-wise values at a given location.
optional string encoded_mask = 1;
// List of rectangle detections.
repeated SalientRegion detections = 2;
}
// General settings needed for multiple calculators.
message ConversionOptions {
extend mediapipe.CalculatorOptions {
optional ConversionOptions ext = 284806832;
}
// Target output width of the conversion.
optional int32 target_width = 1;
// Target output height of the conversion.
optional int32 target_height = 2;
}
// Self-contained message that provides all needed information to render
// autoflip with an external renderer. One of these messages is required for
// each frame of the video.
message ExternalRenderFrame {
// Rectangle using opencv standard.
message Rect {
optional float x = 1;
optional float y = 2;
optional float width = 3;
optional float height = 4;
}
// RGB color [0...255]
message Color {
optional int32 r = 1;
optional int32 g = 2;
optional int32 b = 3;
}
// Rect that must be cropped out of the input frame. It is in the
// original dimensions of the input video. The first step to render this
// frame is to crop this rect from the input frame.
optional Rect crop_from_location = 1;
// Rect that must be cropped out of the input frame. It is defined in the
// ratio of the frame of the input video. The first step to render this frame
// is to crop this rect from the input frame.
optional Rect normalized_crop_from_location = 7;
// The placement location where the above rect is placed on the output frame.
// This will always have the same aspect ratio as the above rect but scaling
// may be required.
optional Rect render_to_location = 2;
// If render_to_location is smaller than the output dimensions of the frame,
// fill the rest of the frame with this color.
optional Color padding_color = 3;
// Timestamp in microseconds of this frame.
optional uint64 timestamp_us = 4;
// Target width of the cropped video in pixels. |render_to_location| is
// relative to this dimension.
optional int32 target_width = 5;
// Target height of the cropped video in pixels. |render_to_location| is
// relative to this dimension.
optional int32 target_height = 6;
}