// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef MEDIAPIPE_UTIL_TRACKING_BOX_DETECTOR_H_ #define MEDIAPIPE_UTIL_TRACKING_BOX_DETECTOR_H_ #include "absl/container/flat_hash_map.h" #include "absl/synchronization/mutex.h" #include "mediapipe/framework/port/opencv_core_inc.h" #include "mediapipe/framework/port/opencv_features2d_inc.h" #include "mediapipe/util/tracking/box_detector.pb.h" #include "mediapipe/util/tracking/box_tracker.pb.h" #include "mediapipe/util/tracking/flow_packager.pb.h" #include "mediapipe/util/tracking/tracking.h" namespace mediapipe { // Feature correspondences between target index and a specific frame. // The size of `points_frame` and `points_index` should be identical and the // corresponding elements are a pair of feature correspondence. struct FeatureCorrespondence { // Matched feature locations from an image frame. std::vector points_frame; // Matched feature locations from the index structure. The location is where // it get detected from a previous frame. std::vector points_index; }; // General interface for multiple box detector implementations class BoxDetectorInterface { public: // Creates box detector based on index type defined in `options`. static std::unique_ptr Create( const BoxDetectorOptions &options); // Locate quad from feature correspondences using perspective model. // Feature locations need to be normalized with 1.0 / max(width, height). // `box_proto` contains quad corners position and aspect ratio. // `frame_aspect` is the aspect ratio for the camera image frame. // Note that to perform pnp tracking, both box aspect ratio and frame aspect // ratio need to be positive. Otherwise fallback to homography tracking. TimedBoxProtoList FindQuadFromFeatureCorrespondence( const FeatureCorrespondence &matches, const TimedBoxProto &box_proto, float frame_aspect = -1.0f); virtual ~BoxDetectorInterface() = default; // Detects pre-set boxes from input frame and adds features from new boxes // into detector's index structure. Features and descriptors should be // pre-computed and passed within `tracking_data`. `tracked_boxes` contains // box tracking results from box_tracker. // If all the boxes in the index are currently being tracked (box.id() found // in `tracked_boxes`), the detection will be skipped and `detected_boxes` // will remain empty. // If the box's ID has never been recorded in the index before, The ID and all // the features within the box will be merged into the index. // `timestamp_msec` should correspond to `tracking_data`. void DetectAndAddBox(const TrackingData &tracking_data, const TimedBoxProtoList &tracked_boxes, int64 timestamp_msec, TimedBoxProtoList *detected_boxes); // Detects pre-set boxes from input frame and adds features from new boxes // into detector's index structure. Features and descriptors are extracted // from `image` in real time. // Other parameters work the same way as the previous function. // `timestamp_msec` should correspond to `image`. void DetectAndAddBox(const cv::Mat &image, const TimedBoxProtoList &tracked_boxes, int64 timestamp_msec, TimedBoxProtoList *detected_boxes); // Stops detection of box with `box_id`. void CancelBoxDetection(int box_id); // Get the current detector's search index. BoxDetectorIndex ObtainBoxDetectorIndex() const; // Add detector's search index with pre-defined index. void AddBoxDetectorIndex(const BoxDetectorIndex &index); // Internal call for public DetectAndAddBox functions. `features` and // `descriptors` can be either extracted from live frames or tracked from // previous frames. `scale_x` and `scale_y` provides actual image aspect ratio // so that boxes from `tracked_boxes` can be denormalized and boxes in // `detected_boxes` normalized. `timestamp_msec` should correspond to the // timestamp of `features` and `descriptors`. void DetectAndAddBoxFromFeatures(const std::vector &features, const cv::Mat &descriptors, const TimedBoxProtoList &tracked_boxes, int64 timestamp_msec, float scale_x, float scale_y, TimedBoxProtoList *detected_boxes); protected: explicit BoxDetectorInterface(const BoxDetectorOptions &options); // `transform_features_for_pnp` controls wheather we transform features // coordinates into a rectangular target space for pnp detection mode. void AddBoxFeaturesToIndex(const std::vector &features, const cv::Mat &descriptors, const TimedBoxProto &box, bool transform_features_for_pnp = false); // Check if add / detect action will be called based on input `tracked_boxes`. bool CheckDetectAndAddBox(const TimedBoxProtoList &tracked_boxes); // Returns feature indices that are within the given box. If the box size // isn't big enough to cover sufficient features to reacquire the box, this // function will try to iteratively enlarge the box size by roughly 5 // percent of the shorter edge of the image to include more features, but // maximimum twice. Note that detected_boxes will still be reported with // original size. External users are then freed from specificially finetuning // a box size for reacquisition. They should choose suitable box size for // tracking based on their use cases. std::vector GetFeatureIndexWithinBox( const std::vector &features, const TimedBoxProto &box); // Specifies which box to detect with `box_idx`. This enalbles separately // managing the detection behavior for each box in the index. Tracked boxes // will be skipped and lost and out-of-view boxes will be detected. TimedBoxProtoList DetectBox(const std::vector &features, const cv::Mat &descriptors, int box_idx); // Only matches those features from the specific box with `box_idx`. virtual std::vector MatchFeatureDescriptors( const std::vector &features, const cv::Mat &descriptors, int box_idx) = 0; // Specifies which box the correspondences come from with `box_id`, so that we // can figure out the transformation accordingly. TimedBoxProtoList FindBoxesFromFeatureCorrespondence( const std::vector &matches, int box_idx); int cnt_detect_called_ = 0; float image_scale_; float image_aspect_; absl::flat_hash_map box_id_to_idx_; std::vector box_idx_to_id_; std::vector> frame_box_; std::vector> feature_to_frame_; std::vector> feature_keypoints_; std::vector feature_descriptors_; std::vector has_been_out_of_fov_; mutable absl::Mutex access_to_index_; cv::Ptr orb_extractor_; BoxDetectorOptions options_; }; } // namespace mediapipe #endif // MEDIAPIPE_UTIL_TRACKING_BOX_DETECTOR_H_