7c2d654d67
Chrome can't use Absl's CHECK because of collisions with its own version. PiperOrigin-RevId: 561740965
530 lines
20 KiB
C++
530 lines
20 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/framework/formats/location.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <memory>
|
|
|
|
#include "absl/log/absl_check.h"
|
|
#include "absl/log/absl_log.h"
|
|
#include "absl/memory/memory.h"
|
|
#include "absl/strings/substitute.h"
|
|
#include "mediapipe/framework/formats/annotation/locus.pb.h"
|
|
#include "mediapipe/framework/formats/annotation/rasterization.pb.h"
|
|
#include "mediapipe/framework/port/canonical_errors.h"
|
|
#include "mediapipe/framework/port/integral_types.h"
|
|
#include "mediapipe/framework/port/point2.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
#include "mediapipe/framework/port/status.h"
|
|
#include "mediapipe/framework/port/statusor.h"
|
|
#include "mediapipe/framework/tool/status_util.h"
|
|
#include "mediapipe/framework/type_map.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
namespace {
|
|
// Extracts from the BinaryMask, stored as mediapipe.Rasterization in
|
|
// the location_data, the tightest bounding box, that contains all pixels
|
|
// encoded in the rasterizations.
|
|
Rectangle_i MaskToRectangle(const LocationData& location_data) {
|
|
ABSL_CHECK(location_data.mask().has_rasterization());
|
|
const auto& rasterization = location_data.mask().rasterization();
|
|
if (rasterization.interval_size() == 0) {
|
|
return Rectangle_i(0, 0, 0, 0);
|
|
}
|
|
int xmin = std::numeric_limits<int>::max();
|
|
int xmax = std::numeric_limits<int>::min();
|
|
int ymin = std::numeric_limits<int>::max();
|
|
int ymax = std::numeric_limits<int>::min();
|
|
for (const auto& interval : rasterization.interval()) {
|
|
xmin = std::min(xmin, interval.left_x());
|
|
xmax = std::max(xmax, interval.right_x());
|
|
ymin = std::min(ymin, interval.y());
|
|
ymax = std::max(ymax, interval.y());
|
|
}
|
|
return Rectangle_i(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Location::Location() {}
|
|
|
|
Location::Location(const LocationData& location_data)
|
|
: location_data_(location_data) {
|
|
ABSL_CHECK(IsValidLocationData(location_data_));
|
|
}
|
|
|
|
Location Location::CreateGlobalLocation() {
|
|
LocationData location_data;
|
|
location_data.set_format(LocationData::GLOBAL);
|
|
return Location(location_data);
|
|
}
|
|
|
|
Location Location::CreateBBoxLocation(int xmin, int ymin, int width,
|
|
int height) {
|
|
LocationData location_data;
|
|
location_data.set_format(LocationData::BOUNDING_BOX);
|
|
auto* bounding_box = location_data.mutable_bounding_box();
|
|
bounding_box->set_xmin(xmin);
|
|
bounding_box->set_ymin(ymin);
|
|
bounding_box->set_width(width);
|
|
bounding_box->set_height(height);
|
|
return Location(location_data);
|
|
}
|
|
|
|
Location Location::CreateBBoxLocation(const Rectangle_i& rect) {
|
|
return CreateBBoxLocation(rect.xmin(), rect.ymin(), rect.Width(),
|
|
rect.Height());
|
|
}
|
|
|
|
Location Location::CreateBBoxLocation(const ::mediapipe::BoundingBox& bbox) {
|
|
return CreateBBoxLocation(bbox.left_x(), bbox.upper_y(),
|
|
bbox.right_x() - bbox.left_x(),
|
|
bbox.lower_y() - bbox.upper_y());
|
|
}
|
|
|
|
Location Location::CreateRelativeBBoxLocation(float relative_xmin,
|
|
float relative_ymin,
|
|
float relative_width,
|
|
float relative_height) {
|
|
LocationData location_data;
|
|
location_data.set_format(LocationData::RELATIVE_BOUNDING_BOX);
|
|
auto* bounding_box = location_data.mutable_relative_bounding_box();
|
|
bounding_box->set_xmin(relative_xmin);
|
|
bounding_box->set_ymin(relative_ymin);
|
|
bounding_box->set_width(relative_width);
|
|
bounding_box->set_height(relative_height);
|
|
return Location(location_data);
|
|
}
|
|
|
|
Location Location::CreateRelativeBBoxLocation(const Rectangle_f& rect) {
|
|
return CreateRelativeBBoxLocation(rect.xmin(), rect.ymin(), rect.Width(),
|
|
rect.Height());
|
|
}
|
|
|
|
LocationData::Format Location::GetFormat() const {
|
|
return location_data_.format();
|
|
}
|
|
|
|
// static
|
|
bool Location::IsValidLocationData(const LocationData& location_data) {
|
|
switch (location_data.format()) {
|
|
case LocationData::GLOBAL: {
|
|
// Nothing to check for global location data.
|
|
return true;
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
return location_data.has_bounding_box() &&
|
|
location_data.bounding_box().has_xmin() &&
|
|
location_data.bounding_box().has_ymin() &&
|
|
location_data.bounding_box().has_width() &&
|
|
location_data.bounding_box().has_height();
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
return location_data.has_relative_bounding_box() &&
|
|
location_data.relative_bounding_box().has_xmin() &&
|
|
location_data.relative_bounding_box().has_ymin() &&
|
|
location_data.relative_bounding_box().has_width() &&
|
|
location_data.relative_bounding_box().has_height();
|
|
}
|
|
case LocationData::MASK: {
|
|
return location_data.has_mask() && location_data.mask().has_width() &&
|
|
location_data.mask().has_height() &&
|
|
location_data.mask().has_rasterization();
|
|
}
|
|
default: {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <>
|
|
Rectangle_i Location::GetBBox<Rectangle_i>() const {
|
|
ABSL_CHECK_EQ(LocationData::BOUNDING_BOX, location_data_.format());
|
|
const auto& box = location_data_.bounding_box();
|
|
return Rectangle_i(box.xmin(), box.ymin(), box.width(), box.height());
|
|
}
|
|
|
|
Location& Location::Scale(const float scale) {
|
|
ABSL_CHECK(!location_data_.has_mask())
|
|
<< "Location mask scaling is not implemented.";
|
|
ABSL_CHECK_GT(scale, 0.0f);
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
// Do nothing.
|
|
break;
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
auto* bb = location_data_.mutable_bounding_box();
|
|
bb->set_xmin(scale * bb->xmin());
|
|
bb->set_ymin(scale * bb->ymin());
|
|
bb->set_width(scale * bb->width());
|
|
bb->set_height(scale * bb->height());
|
|
break;
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
auto* bb = location_data_.mutable_relative_bounding_box();
|
|
bb->set_xmin(scale * bb->xmin());
|
|
bb->set_ymin(scale * bb->ymin());
|
|
bb->set_width(scale * bb->width());
|
|
bb->set_height(scale * bb->height());
|
|
for (auto& keypoint : *location_data_.mutable_relative_keypoints()) {
|
|
keypoint.set_x(scale * keypoint.x());
|
|
keypoint.set_y(scale * keypoint.y());
|
|
}
|
|
break;
|
|
}
|
|
case LocationData::MASK: {
|
|
ABSL_LOG(FATAL)
|
|
<< "Scaling for location data of type MASK is not supported.";
|
|
break;
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
Location& Location::Square(int image_width, int image_height) {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
// Do nothing.
|
|
break;
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_bounding_box();
|
|
const int max_dimension = std::max(box->width(), box->height());
|
|
if (max_dimension > box->width()) {
|
|
box->set_xmin(box->xmin() + box->width() / 2 - max_dimension / 2);
|
|
box->set_width(max_dimension);
|
|
} else if (max_dimension > box->height()) {
|
|
box->set_ymin(box->ymin() + box->height() / 2 - max_dimension / 2);
|
|
box->set_height(max_dimension);
|
|
}
|
|
break;
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_relative_bounding_box();
|
|
const float absolute_xmin = box->xmin() * image_width;
|
|
const float absolute_ymin = box->ymin() * image_height;
|
|
const float absolute_width = box->width() * image_width;
|
|
const float absolute_height = box->height() * image_height;
|
|
const float max_dimension = std::max(absolute_width, absolute_height);
|
|
if (max_dimension > absolute_width) {
|
|
box->set_xmin((absolute_xmin + absolute_width / 2 - max_dimension / 2) /
|
|
image_width);
|
|
box->set_width(max_dimension / image_width);
|
|
} else if (max_dimension > absolute_height) {
|
|
box->set_ymin(
|
|
(absolute_ymin + absolute_height / 2 - max_dimension / 2) /
|
|
image_height);
|
|
box->set_height(max_dimension / image_height);
|
|
}
|
|
break;
|
|
}
|
|
case LocationData::MASK: {
|
|
ABSL_LOG(FATAL)
|
|
<< "Squaring for location data of type MASK is not supported.";
|
|
break;
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Finds an optimal shift t such that I = [min_value + t, max_value + t) will be
|
|
// included in the interval J = [0, range) if possible. If the above is not
|
|
// possible, then interval I will be centered at the center of interval J.
|
|
// This function is inteded to shift boundaries of intervals such that they
|
|
// best fit within an image.
|
|
float BestShift(float min_value, float max_value, float range) {
|
|
ABSL_CHECK_LE(min_value, max_value);
|
|
const float value_range = max_value - min_value;
|
|
if (value_range > range) {
|
|
return 0.5f * (range - min_value - max_value);
|
|
} else {
|
|
if (min_value < 0.0f) {
|
|
return -min_value;
|
|
} else if (max_value > range) {
|
|
return range - max_value;
|
|
}
|
|
}
|
|
return 0.0f;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Location& Location::ShiftToFitBestIntoImage(int image_width, int image_height) {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
// Do nothing.
|
|
break;
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_bounding_box();
|
|
box->set_xmin(static_cast<int>(std::round(
|
|
box->xmin() +
|
|
BestShift(box->xmin(), box->xmin() + box->width(), image_width))));
|
|
box->set_ymin(static_cast<int>(std::round(
|
|
box->ymin() +
|
|
BestShift(box->ymin(), box->ymin() + box->height(), image_height))));
|
|
break;
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_relative_bounding_box();
|
|
box->set_xmin(box->xmin() +
|
|
BestShift(box->xmin(), box->xmin() + box->width(), 1.0f));
|
|
box->set_ymin(box->ymin() +
|
|
BestShift(box->ymin(), box->ymin() + box->height(), 1.0f));
|
|
break;
|
|
}
|
|
case LocationData::MASK: {
|
|
auto mask_bounding_box = MaskToRectangle(location_data_);
|
|
const float x_shift = BestShift(mask_bounding_box.xmin(),
|
|
mask_bounding_box.xmax(), image_width);
|
|
const float y_shift = BestShift(mask_bounding_box.xmin(),
|
|
mask_bounding_box.xmax(), image_height);
|
|
auto* mask = location_data_.mutable_mask();
|
|
ABSL_CHECK_EQ(image_width, mask->width());
|
|
ABSL_CHECK_EQ(image_height, mask->height());
|
|
for (auto& interval :
|
|
*mask->mutable_rasterization()->mutable_interval()) {
|
|
interval.set_y(interval.y() + y_shift);
|
|
interval.set_left_x(interval.left_x() + x_shift);
|
|
interval.set_right_x(interval.right_x() + x_shift);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
Location& Location::Crop(const Rectangle_i& crop_box) {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
// Do nothing.
|
|
break;
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_bounding_box();
|
|
const int xmin = std::max(box->xmin(), crop_box.xmin());
|
|
const int ymin = std::max(box->ymin(), crop_box.ymin());
|
|
const int xmax = std::min(box->width() + box->xmin(), crop_box.xmax());
|
|
const int ymax = std::min(box->height() + box->ymin(), crop_box.ymax());
|
|
box->set_xmin(xmin - crop_box.xmin());
|
|
box->set_ymin(ymin - crop_box.ymin());
|
|
box->set_width(xmax - xmin);
|
|
box->set_height(ymax - ymin);
|
|
break;
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX:
|
|
ABSL_LOG(FATAL)
|
|
<< "Can't crop a relative bounding box using absolute coordinates. "
|
|
"Use the 'Rectangle_f version of Crop() instead";
|
|
case LocationData::MASK: {
|
|
LocationData::BinaryMask new_mask;
|
|
new_mask.set_width(crop_box.Width());
|
|
new_mask.set_height(crop_box.Height());
|
|
auto* rasterization = new_mask.mutable_rasterization();
|
|
for (const auto& interval :
|
|
location_data_.mask().rasterization().interval()) {
|
|
if (interval.y() >= crop_box.ymin() && interval.y() < crop_box.ymax() &&
|
|
interval.left_x() < crop_box.xmax() &&
|
|
interval.right_x() > crop_box.xmin()) {
|
|
auto* new_interval = rasterization->add_interval();
|
|
new_interval->set_y(interval.y() - crop_box.ymin());
|
|
new_interval->set_left_x(
|
|
std::max(interval.left_x() - crop_box.xmin(), 0));
|
|
new_interval->set_right_x(
|
|
std::min(interval.right_x() - crop_box.xmin(), crop_box.Width()));
|
|
}
|
|
}
|
|
location_data_.mutable_mask()->Swap(&new_mask);
|
|
break;
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
Location& Location::Crop(const Rectangle_f& crop_box) {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL:
|
|
// Do nothing.
|
|
break;
|
|
case LocationData::BOUNDING_BOX:
|
|
ABSL_LOG(FATAL)
|
|
<< "Can't crop an absolute bounding box using relative coordinates. "
|
|
"Use the 'Rectangle_i version of Crop() instead";
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
auto* box = location_data_.mutable_relative_bounding_box();
|
|
float right = box->xmin() + box->width();
|
|
float bottom = box->ymin() + box->height();
|
|
box->set_xmin(std::max(crop_box.xmin(), box->xmin()));
|
|
box->set_ymin(std::max(crop_box.ymin(), box->ymin()));
|
|
float newRight = std::min(crop_box.xmax(), right);
|
|
float newBottom = std::min(crop_box.ymax(), bottom);
|
|
box->set_width(newRight - box->xmin());
|
|
box->set_height(newBottom - box->ymin());
|
|
break;
|
|
}
|
|
case LocationData::MASK:
|
|
ABSL_LOG(FATAL)
|
|
<< "Can't crop a mask using relative coordinates. Use the "
|
|
"'Rectangle_i' version of Crop() instead";
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
template <>
|
|
Rectangle_i Location::ConvertToBBox<Rectangle_i>(int image_width,
|
|
int image_height) const {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
return Rectangle_i(0, 0, image_width, image_height);
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
const auto& box = location_data_.bounding_box();
|
|
return Rectangle_i(box.xmin(), box.ymin(), box.width(), box.height());
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
const auto& box = location_data_.relative_bounding_box();
|
|
// Taking the floor rather than rounding for the width and height ensures
|
|
// that if the original relative bounding box was within the image bounds,
|
|
// the absolute bounding box that it is converted to will also be
|
|
// within the image bounds.
|
|
return Rectangle_i(
|
|
static_cast<int>(std::round(image_width * box.xmin())),
|
|
static_cast<int>(std::round(image_height * box.ymin())),
|
|
static_cast<int>(image_width * box.width()),
|
|
static_cast<int>(image_height * box.height()));
|
|
}
|
|
case LocationData::MASK: {
|
|
return MaskToRectangle(location_data_);
|
|
}
|
|
default: {
|
|
// Note that this shouldn't happen, but a default clause is required by
|
|
// the Android compiler.
|
|
return Rectangle_i();
|
|
}
|
|
}
|
|
}
|
|
|
|
Rectangle_f Location::GetRelativeBBox() const {
|
|
ABSL_CHECK_EQ(LocationData::RELATIVE_BOUNDING_BOX, location_data_.format());
|
|
const auto& box = location_data_.relative_bounding_box();
|
|
return Rectangle_f(box.xmin(), box.ymin(), box.width(), box.height());
|
|
}
|
|
|
|
Rectangle_f Location::ConvertToRelativeBBox(int image_width,
|
|
int image_height) const {
|
|
switch (location_data_.format()) {
|
|
case LocationData::GLOBAL: {
|
|
return Rectangle_f(0.0f, 0.0f, 1.0f, 1.0f);
|
|
}
|
|
case LocationData::BOUNDING_BOX: {
|
|
const auto& box = location_data_.bounding_box();
|
|
return Rectangle_f(static_cast<float>(box.xmin()) / image_width,
|
|
static_cast<float>(box.ymin()) / image_height,
|
|
static_cast<float>(box.width()) / image_width,
|
|
static_cast<float>(box.height()) / image_height);
|
|
}
|
|
case LocationData::RELATIVE_BOUNDING_BOX: {
|
|
const auto& box = location_data_.relative_bounding_box();
|
|
return Rectangle_f(box.xmin(), box.ymin(), box.width(), box.height());
|
|
}
|
|
case LocationData::MASK: {
|
|
auto rect = MaskToRectangle(location_data_);
|
|
return Rectangle_f(static_cast<float>(rect.xmin()) / image_width,
|
|
static_cast<float>(rect.ymin()) / image_height,
|
|
static_cast<float>(rect.Width()) / image_width,
|
|
static_cast<float>(rect.Height()) / image_height);
|
|
}
|
|
default: {
|
|
// Note that this shouldn't happen, but a default clause is required by
|
|
// the Android compiler.
|
|
return Rectangle_f();
|
|
}
|
|
}
|
|
}
|
|
|
|
template <>
|
|
::mediapipe::BoundingBox Location::GetBBox<::mediapipe::BoundingBox>() const {
|
|
ABSL_CHECK_EQ(LocationData::BOUNDING_BOX, location_data_.format());
|
|
const auto& box = location_data_.bounding_box();
|
|
::mediapipe::BoundingBox bounding_box;
|
|
bounding_box.set_left_x(box.xmin());
|
|
bounding_box.set_upper_y(box.ymin());
|
|
bounding_box.set_right_x(box.width() + box.xmin());
|
|
bounding_box.set_lower_y(box.height() + box.ymin());
|
|
return bounding_box;
|
|
}
|
|
|
|
template <>
|
|
::mediapipe::BoundingBox Location::ConvertToBBox<::mediapipe::BoundingBox>(
|
|
int image_width, int image_height) const {
|
|
const auto& rect = ConvertToBBox<Rectangle_i>(image_width, image_height);
|
|
::mediapipe::BoundingBox bounding_box;
|
|
bounding_box.set_left_x(rect.xmin());
|
|
bounding_box.set_upper_y(rect.ymin());
|
|
bounding_box.set_right_x(rect.xmax());
|
|
bounding_box.set_lower_y(rect.ymax());
|
|
return bounding_box;
|
|
}
|
|
|
|
std::vector<Point2_f> Location::GetRelativeKeypoints() const {
|
|
ABSL_CHECK_EQ(LocationData::RELATIVE_BOUNDING_BOX, location_data_.format());
|
|
std::vector<Point2_f> keypoints;
|
|
for (const auto& keypoint : location_data_.relative_keypoints()) {
|
|
keypoints.emplace_back(Point2_f(keypoint.x(), keypoint.y()));
|
|
}
|
|
return keypoints;
|
|
}
|
|
|
|
std::vector<Point2_i> Location::ConvertToKeypoints(int image_width,
|
|
int image_height) const {
|
|
std::vector<Point2_i> keypoints;
|
|
for (const auto& keypoint : location_data_.relative_keypoints()) {
|
|
keypoints.emplace_back(
|
|
Point2_i(static_cast<int>(std::round(image_width * keypoint.x())),
|
|
static_cast<int>(std::round(image_height * keypoint.y()))));
|
|
}
|
|
return keypoints;
|
|
}
|
|
|
|
void Location::SetRelativeKeypoints(const std::vector<Point2_f>& keypoints) {
|
|
location_data_.clear_relative_keypoints();
|
|
for (const auto& keypoint : keypoints) {
|
|
auto* relative_keypoint = location_data_.add_relative_keypoints();
|
|
relative_keypoint->set_x(keypoint.x());
|
|
relative_keypoint->set_y(keypoint.y());
|
|
}
|
|
}
|
|
|
|
void Location::SetFromProto(const LocationData& proto) {
|
|
location_data_ = proto;
|
|
}
|
|
|
|
void Location::ConvertToProto(LocationData* proto) const {
|
|
*proto = location_data_;
|
|
}
|
|
|
|
LocationData Location::ConvertToProto() const {
|
|
LocationData location_data;
|
|
ConvertToProto(&location_data);
|
|
return location_data;
|
|
}
|
|
|
|
} // namespace mediapipe
|