// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mediapipe; import "mediapipe/util/tracking/box_tracker.proto"; import "mediapipe/util/tracking/region_flow.proto"; option java_package = "com.google.mediapipe.tracking"; option java_outer_classname = "BoxDetectorProto"; message BoxDetectorOptions { // Available types of detector's index and search structure. enum IndexType { INDEX_UNSPECIFIED = 0; // BFMatcher from OpenCV OPENCV_BF = 1; } optional IndexType index_type = 1 [default = OPENCV_BF]; // Decide whether we force detector run every N frame. // 0 means detection will never be called. // 1 means detect every frame. 2 means detect every other frame. etc.. // Currently only applied to image query mode. optional int32 detect_every_n_frame = 2 [default = 0]; // Enable box detection when tracked boxes is out of FOV. Detection will be // ceased after the detector successfully re-acquire the box. optional bool detect_out_of_fov = 4 [default = false]; // Options only for detection from image queries. message ImageQuerySettings { // Resize the input image's longer edge to this size. Skip resizing if the // input size is already smaller than this size. optional int32 pyramid_bottom_size = 1 [default = 640]; // Scale factor between adjacent pyramid levels. optional float pyramid_scale_factor = 2 [default = 1.2]; // Maximum number of pyramid levels. optional int32 max_pyramid_levels = 3 [default = 4]; // Max number of features the detector uses. optional int32 max_features = 4 [default = 500]; } // Options for detection function with image query. optional ImageQuerySettings image_query_settings = 3; // Dimensions (number of elements) for feature descriptor. optional int32 descriptor_dims = 5 [default = 40]; // Minimum number of correspondence to go through RANSAC. optional int32 min_num_correspondence = 6 [default = 5]; // Reprojection threshold for RANSAC to find inliers. optional float ransac_reprojection_threshold = 7 [default = 0.005]; // Max distance to match 2 NIMBY features. optional float max_match_distance = 8 [default = 0.9]; // Max persepective change factor. optional float max_perspective_factor = 9 [default = 0.1]; } // Proto to hold BoxDetector's internal search index. message BoxDetectorIndex { // Message to hold keypoints and descriptors for each box. message BoxEntry { // Message to hold keypoints and descriptors for each appearance. One box // could have multiple appearances to account for shape and perspective // change, etc.. message FrameEntry { optional TimedBoxProto box = 1; repeated float keypoints = 2; repeated BinaryFeatureDescriptor descriptors = 3; } repeated FrameEntry frame_entry = 1; } repeated BoxEntry box_entry = 1; }