208 lines
9.3 KiB
C++
208 lines
9.3 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// A container for location data, representing location information in an image.
|
|
// This wrapper provides two functionalities:
|
|
// 1. Factory methods for creation of Location objects and thus LocationData
|
|
// protocol buffers. These methods guarantee a valid location data and are
|
|
// the prefer way of creating such.
|
|
// 2. Accessors which allow for extracting location information in various
|
|
// formats. If necessary, the location information is converted.
|
|
|
|
#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_LOCATION_H_
|
|
#define MEDIAPIPE_FRAMEWORK_FORMATS_LOCATION_H_
|
|
|
|
#include <memory>
|
|
|
|
#include "mediapipe/framework/formats/location_data.pb.h"
|
|
#include "mediapipe/framework/port.h"
|
|
#include "mediapipe/framework/port/point2.h"
|
|
#include "mediapipe/framework/port/rectangle.h"
|
|
|
|
// clang-format off
|
|
#if !defined(LOCATION_OPENCV)
|
|
# define LOCATION_OPENCV 1
|
|
#endif
|
|
|
|
#if LOCATION_OPENCV
|
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
|
#endif
|
|
// clang-format on
|
|
|
|
namespace mediapipe {
|
|
class BoundingBox;
|
|
} // namespace mediapipe
|
|
|
|
namespace mediapipe {
|
|
|
|
class Location {
|
|
public:
|
|
// CREATION METHODS.
|
|
Location();
|
|
// Constructs a location wrapping the specified location data. Checks the
|
|
// validity of the input and crashes upon failure.
|
|
explicit Location(const LocationData& location_data);
|
|
// Creates a location of type GLOBAL, i.e. a location representing the full
|
|
// image.
|
|
static Location CreateGlobalLocation();
|
|
// Creates a location of type BOUNDING_BOX, i.e. it is based on a bounding box
|
|
// defined by its upper left corner (xmin, ymin) and its width and height.
|
|
static Location CreateBBoxLocation(int xmin, int ymin, int width, int height);
|
|
// Creates a location of type BOUNDING_BOX from bounding boxes in various
|
|
// formats.
|
|
static Location CreateBBoxLocation(const Rectangle_i& rect);
|
|
static Location CreateBBoxLocation(const ::mediapipe::BoundingBox& bbox);
|
|
#if LOCATION_OPENCV
|
|
static Location CreateBBoxLocation(const cv::Rect& rect);
|
|
#endif
|
|
// Creates a location of type RELATIVE_BOUNDING_BOX, i.e. it is based on a
|
|
// bounding box defined by its upper left corner (xmin, ymin) and its width
|
|
// and height, all relative to the image dimensions.
|
|
static Location CreateRelativeBBoxLocation(float relative_xmin,
|
|
float relative_ymin,
|
|
float relative_width,
|
|
float relative_height);
|
|
// Creates a location of type RELATIVE_BOUNDING_BOX from bounding boxes in
|
|
// various formats.
|
|
static Location CreateRelativeBBoxLocation(const Rectangle_f& relative_rect);
|
|
#if LOCATION_OPENCV
|
|
// Creates a location of type MASK from a single-channel uint8 or float
|
|
// cv::Mat_ (type is CV_8UC1 or CV_32FC1). Check fails if the mat is not
|
|
// single channel . All pixel with positive values are considered foreground,
|
|
// the rest background.
|
|
template <typename T>
|
|
static Location CreateCvMaskLocation(const cv::Mat_<T>& mask);
|
|
#endif
|
|
|
|
// Returns the location type describing the type of data it contains. This
|
|
// type is set at creation time based on the one of the above factory methods.
|
|
LocationData::Format GetFormat() const;
|
|
// Checks the validity of the specified location_data, i.e. whether all the
|
|
// necessary fields for the location data type are set.
|
|
static bool IsValidLocationData(const LocationData& location_data);
|
|
|
|
// MODIFIERS
|
|
// Scales the current bounding box (x,y,width,height) by the "scale" amount
|
|
// (1.0f means no scale, <1.0f means scale down, and >1.0f means scale up).
|
|
// Returns *this.
|
|
//
|
|
// NOTE: it does not handle masks.
|
|
Location& Scale(float scale);
|
|
|
|
#if LOCATION_OPENCV
|
|
// Enlarges the location by the given factor. This operation keeps the center
|
|
// of the location fixed, while enlarging its dimensions by the given factor.
|
|
// Note that the location may partially lie outside the image after this
|
|
// operation. OpenCV required for mask enlargement. Returns *this.
|
|
Location& Enlarge(float factor);
|
|
#endif
|
|
|
|
// Resizes the location such that it is the tighest square location containing
|
|
// centered the original location. It supports locations of type GLOBAL,
|
|
// BOUNDING_BOX and RELATIVE_BOUNDING_BOX, otherwise it CHECK-fails. The user
|
|
// must specify the image dimensions. Returns *this.
|
|
Location& Square(int image_width, int image_height);
|
|
|
|
// If the location is larger than the image, then in some cases it is
|
|
// beneficial to translate the location such that the image is centered
|
|
// within the location. The following method achieves this translation.
|
|
// Returns *this.
|
|
Location& ShiftToFitBestIntoImage(int image_width, int image_height);
|
|
|
|
// Replaces the location with the intersection of the current location the
|
|
// specified crop rectangle. Locations of type GLOBAL remain
|
|
// unmodified. The image size is used for locations of type
|
|
// BOUNDARY_BOX and MASK. Using this override for locations of type
|
|
// RELATIVE_BOUNDING_BOX is an error. Use the Rectangle_f override of this
|
|
// function instead.
|
|
// This operation is useful when one needs to make sure that the location is
|
|
// fully contained within the specified image. Returns *this.
|
|
Location& Crop(const Rectangle_i& crop_rectangle);
|
|
|
|
// Replaces the location with the intersection of the current location and the
|
|
// specified crop rectangle. Locations of type GLOBAL remain
|
|
// unmodified. This override is only for locations of type
|
|
// RELATIVE_BOUNDING_BOX. Use the Rectangle_i override of this
|
|
// function for other types of locations.
|
|
// This operation is useful when one needs to make sure that the location is
|
|
// fully contained within the specified image. Returns *this.
|
|
Location& Crop(const Rectangle_f& crop_rectangle);
|
|
|
|
// ACCESSORS.
|
|
// Non-type converting accessor: returns the requested data only if the output
|
|
// format is consistent with the location data type. E.g. if one requests a
|
|
// rectangle, then the wrapped location data should be of type BOUNDING_BOX.
|
|
// Accessor for location data type BOUNDING_BOX with two possible return types
|
|
// Rectangle_i and mediapipe.::mediapipe::BoundingBox.
|
|
template <typename T>
|
|
T GetBBox() const;
|
|
// Accessor for location data type RELATIVE_BOUNDING_BOX.
|
|
Rectangle_f GetRelativeBBox() const;
|
|
#if LOCATION_OPENCV
|
|
// Same as GetMask() with the difference that the return value is a cv::Mat of
|
|
// type CV_8UC1. It contains value 0 for background pixels and value 255 for
|
|
// foreground ones.
|
|
std::unique_ptr<cv::Mat> GetCvMask() const;
|
|
#endif
|
|
// Accessor for relative_keypoints in location data. Relative keypoints are
|
|
// specified with x and y coordinates, where both x and y are relative to the
|
|
// image width and height, respectively, and are in the range [0, 1]. Fails if
|
|
// location data is not of type RELATIVE_BOUNDING_BOX.
|
|
std::vector<Point2_f> GetRelativeKeypoints() const;
|
|
|
|
// Type converting accessor: returns the requested data in the specified
|
|
// output type. If the location data is in a format not directly convertible
|
|
// to the specified return type the following conversion principles are used:
|
|
// - Rectangle -> Mask: the rectangle is converted to a mask with all
|
|
// pixels inside the rectangle being foreground pixels.
|
|
// - Mask -> Rectangle: the tightest enclosing rectangle to the mask is the
|
|
// rectangle representation of a mask.
|
|
// - Global -> Rectangle, Mask: all the pixels in the image are considered
|
|
// foreground. Thus, the equivalent rectangle and mask are a rectangle
|
|
// covering the full image.
|
|
// The supported output types are the same as for GetBBox() above: Rectangle_i
|
|
// and mediapipe.::mediapipe::BoundingBox.
|
|
template <typename T>
|
|
T ConvertToBBox(int image_width, int image_height) const;
|
|
Rectangle_f ConvertToRelativeBBox(int image_width, int image_height) const;
|
|
#if LOCATION_OPENCV
|
|
std::unique_ptr<cv::Mat> ConvertToCvMask(int image_width,
|
|
int image_height) const;
|
|
#endif
|
|
// Returns keypoints in absolute pixel coordinates.
|
|
std::vector<Point2_i> ConvertToKeypoints(int image_width,
|
|
int image_height) const;
|
|
|
|
// Sets the keypoints.
|
|
void SetRelativeKeypoints(const std::vector<Point2_f>& keypoints);
|
|
|
|
// Serializes and deserializes the location object.
|
|
void ConvertToProto(LocationData* proto) const;
|
|
LocationData ConvertToProto() const;
|
|
void SetFromProto(const LocationData& proto);
|
|
|
|
#if !defined(MEDIAPIPE_MOBILE) && !defined(MEDIAPIPE_LITE)
|
|
// Deep equality comparison.
|
|
bool operator==(const Location& other) const;
|
|
bool operator!=(const Location& other) const;
|
|
#endif // !defined(MEDIAPIPE_MOBILE) && !defined(MEDIAPIPE_LITE)
|
|
|
|
private:
|
|
// The wrapped location data.
|
|
LocationData location_data_;
|
|
};
|
|
} // namespace mediapipe
|
|
|
|
#endif // MEDIAPIPE_FRAMEWORK_FORMATS_LOCATION_H_
|