145 lines
5.4 KiB
C++
145 lines
5.4 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/util/tracking/tracked_detection.h"
|
|
|
|
#include "mediapipe/framework/formats/rect.pb.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
namespace {
|
|
|
|
// Struct for carrying boundary information.
|
|
struct NormalizedRectBounds {
|
|
float left, right, top, bottom;
|
|
};
|
|
|
|
// Computes the area of a NormalizedRect.
|
|
float BoxArea(const NormalizedRect& box) { return box.width() * box.height(); }
|
|
|
|
// Computes the bounds of a NormalizedRect.
|
|
void GetNormalizedRectBounds(const NormalizedRect& box,
|
|
NormalizedRectBounds* bounds) {
|
|
bounds->left = box.x_center() - box.width() / 2.f;
|
|
bounds->right = box.x_center() + box.width() / 2.f;
|
|
bounds->top = box.y_center() - box.height() / 2.f;
|
|
bounds->bottom = box.y_center() + box.height() / 2.f;
|
|
}
|
|
|
|
// Computes the overlapping area of two boxes, ignoring rotation.
|
|
float OverlapArea(const NormalizedRect& box1, const NormalizedRect& box2) {
|
|
NormalizedRectBounds bounds1, bounds2;
|
|
GetNormalizedRectBounds(box1, &bounds1);
|
|
GetNormalizedRectBounds(box2, &bounds2);
|
|
const float x_overlap =
|
|
std::max(0.f, std::min(bounds1.right, bounds2.right) -
|
|
std::max(bounds1.left, bounds2.left));
|
|
const float y_overlap =
|
|
std::max(0.f, std::min(bounds1.bottom, bounds2.bottom) -
|
|
std::max(bounds1.top, bounds2.top));
|
|
return x_overlap * y_overlap;
|
|
}
|
|
|
|
std::array<Vector2_f, 4> ComputeCorners(const NormalizedRect& normalized_box,
|
|
const Vector2_f& center,
|
|
int image_width, int image_height) {
|
|
NormalizedRectBounds bounds;
|
|
GetNormalizedRectBounds(normalized_box, &bounds);
|
|
// Rotate 4 corner w.r.t. center.
|
|
std::array<Vector2_f, 4> corners{{
|
|
Vector2_f(bounds.left * image_width, bounds.top * image_height),
|
|
Vector2_f(bounds.left * image_width, bounds.bottom * image_height),
|
|
Vector2_f(bounds.right * image_width, bounds.bottom * image_height),
|
|
Vector2_f(bounds.right * image_width, bounds.top * image_height),
|
|
}};
|
|
if (std::abs(normalized_box.rotation()) <= 1e-5) {
|
|
return corners;
|
|
}
|
|
|
|
const float cos_a = std::cos(normalized_box.rotation());
|
|
const float sin_a = std::sin(normalized_box.rotation());
|
|
for (int k = 0; k < 4; ++k) {
|
|
// Scale and rotate w.r.t. center.
|
|
const Vector2_f rad = corners[k] - center;
|
|
const Vector2_f rot_rad(cos_a * rad.x() - sin_a * rad.y(),
|
|
sin_a * rad.x() + cos_a * rad.y());
|
|
corners[k] = center + rot_rad;
|
|
}
|
|
return corners;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void TrackedDetection::AddLabel(const std::string& label, float score) {
|
|
auto label_ptr = label_to_score_map_.find(label);
|
|
if (label_ptr == label_to_score_map_.end()) {
|
|
label_to_score_map_[label] = score;
|
|
} else {
|
|
label_ptr->second = label_ptr->second > score ? label_ptr->second : score;
|
|
}
|
|
}
|
|
|
|
bool TrackedDetection::IsSameAs(const TrackedDetection& other,
|
|
float max_area_ratio,
|
|
float min_overlap_ratio) const {
|
|
const auto box0 = bounding_box_;
|
|
const auto box1 = other.bounding_box_;
|
|
const double box0_area = BoxArea(box0);
|
|
const double box1_area = BoxArea(box1);
|
|
const double overlap_area = OverlapArea(box0, box1);
|
|
|
|
// For cases where a small object is in front of a big object.
|
|
// TODO: This is hard threshold. Make the threshold smaller
|
|
// (e.g. 2.0) will cause issues when two detections of the same object is
|
|
// vertial to each other. For example, if we first get a detection (e.g. a
|
|
// long water bottle) vertically and then change the camera to horizontal
|
|
// quickly, then it will get another detection which will have a diamond shape
|
|
// that is much larger than the previous rectangle one.
|
|
if (box0_area / box1_area > max_area_ratio ||
|
|
box1_area / box0_area > max_area_ratio)
|
|
return false;
|
|
|
|
if (overlap_area / box0_area > min_overlap_ratio ||
|
|
overlap_area / box1_area > min_overlap_ratio) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void TrackedDetection::MergeLabelScore(const TrackedDetection& other) {
|
|
for (const auto& label_score : other.label_to_score_map()) {
|
|
const auto label_score_ptr = label_to_score_map_.find(label_score.first);
|
|
if (label_score_ptr == label_to_score_map_.end()) {
|
|
AddLabel(label_score.first, label_score.second);
|
|
} else {
|
|
// TODO: Consider other strategy of merging scores, e.g. mean.
|
|
label_score_ptr->second =
|
|
std::max(label_score_ptr->second, label_score.second);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::array<Vector2_f, 4> TrackedDetection::GetCorners(
|
|
float image_width, float image_height) const {
|
|
NormalizedRectBounds bounds;
|
|
GetNormalizedRectBounds(bounding_box_, &bounds);
|
|
Vector2_f center((bounds.right + bounds.left) / 2.0f * image_width,
|
|
(bounds.bottom + bounds.top) / 2.0f * image_height);
|
|
|
|
return ComputeCorners(bounding_box_, center, image_width, image_height);
|
|
}
|
|
|
|
} // namespace mediapipe
|