bdfdaef305
GitOrigin-RevId: b2062656e5b3d33264e28ed0cbca31c4b93fe1bf
258 lines
11 KiB
C++
258 lines
11 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// Get a cv::Mat view of the ImageFrame (this is efficient):
|
|
// ::mediapipe::formats::MatView(&frame);
|
|
//
|
|
// Copying data from raw data (stored contiguously):
|
|
// frame.CopyPixelData(format, width, height, raw_data_ptr,
|
|
// ImageFrame::kDefaultAlignmentBoundary);
|
|
//
|
|
// Convert an RGB ImageFrame (rgb_frame) to Grayscale:
|
|
// ImageFrame gray_frame(ImageFormat::GRAY8, rgb_frame.Width(),
|
|
// rgb_frame.Height());
|
|
// cv::Mat rgb_frame_mat = ::mediapipe::formats::MatView(&rgb_frame);
|
|
// cv::Mat gray_frame_mat = ::mediapipe::formats::MatView(&gray_frame);
|
|
// cv::cvtColor(rgb_frame_mat, gray_frame_mat, CV_RGB2GRAY);
|
|
//
|
|
// Resize an ImageFrame:
|
|
// ImageFrame small_image(ImageFormat::GRAY8, 10, 10);
|
|
// cv::Mat destination = ::mediapipe::formats::MatView(&small_image);
|
|
// cv::resize(::mediapipe::formats::MatView(&large_image), destination,
|
|
// destination.size(), 0, 0, cv::INTER_LINEAR);
|
|
|
|
#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_IMAGE_FRAME_H_
|
|
#define MEDIAPIPE_FRAMEWORK_FORMATS_IMAGE_FRAME_H_
|
|
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
|
#include "mediapipe/framework/port.h"
|
|
#include "mediapipe/framework/port/integral_types.h"
|
|
#include "mediapipe/framework/tool/type_util.h"
|
|
|
|
#define IMAGE_FRAME_RAW_IMAGE MEDIAPIPE_HAS_RTTI
|
|
|
|
namespace mediapipe {
|
|
|
|
// A container for storing an image or a video frame, in one of several
|
|
// formats. Pixels are encoded row-major in an interleaved fashion.
|
|
//
|
|
// Formats supported by ImageFrame are listed in the ImageFormat proto.
|
|
// It is the intention of ImageFormat to specify both the data format
|
|
// and the colorspace used. For example GRAY8 and GRAY16 both use the
|
|
// same colorspace but have different formats. Although it would be
|
|
// possible to keep HSV, linearRGB, or BGR values inside an ImageFrame
|
|
// (with format SRGB) this is an abuse of the class. If you need a new
|
|
// format, please add one to ImageFormat::Format.
|
|
//
|
|
// Do not assume that the pixel data is stored contiguously. It may be
|
|
// stored with row padding for alignment purposes.
|
|
class ImageFrame {
|
|
public:
|
|
typedef std::function<void(uint8*)> Deleter;
|
|
|
|
// This class offers a few standard delete functions and retains
|
|
// compatibility with the previous API.
|
|
class PixelDataDeleter {
|
|
public:
|
|
static const Deleter kArrayDelete;
|
|
static const Deleter kFree;
|
|
static const Deleter kAlignedFree;
|
|
static const Deleter kNone;
|
|
};
|
|
|
|
// Use a default alignment boundary of 16 because Intel SSE2 instructions may
|
|
// incur performance penalty when accessing data not aligned on a 16-byte
|
|
// boundary. FFmpeg requires at least this level of alignment.
|
|
static const uint32 kDefaultAlignmentBoundary = 16;
|
|
|
|
// If the pixel data of an ImageFrame will be passed to an OpenGL function
|
|
// such as glTexImage2D() or glReadPixels(), use a four-byte alignment
|
|
// boundary because that is the initial value of the OpenGL GL_PACK_ALIGNMENT
|
|
// and GL_UNPACK_ALIGNMENT parameters.
|
|
static const uint32 kGlDefaultAlignmentBoundary = 4;
|
|
|
|
// Returns number of channels for an ImageFormat.
|
|
static int NumberOfChannelsForFormat(ImageFormat::Format format);
|
|
// Returns the channel size for an ImageFormat.
|
|
static int ChannelSizeForFormat(ImageFormat::Format format);
|
|
// Returns depth of each channel in bytes for an ImageFormat.
|
|
static int ByteDepthForFormat(ImageFormat::Format format);
|
|
|
|
ImageFrame(const ImageFrame&) = delete;
|
|
ImageFrame& operator=(const ImageFrame&) = delete;
|
|
// Creates an empty ImageFrame. It will need to be initialized by some other
|
|
// means.
|
|
ImageFrame();
|
|
|
|
// Allocate a frame of the appropriate size. Does not zero it out.
|
|
// Each row will be aligned to alignment_boundary. alignment_boundary
|
|
// must be a power of 2 (the number 1 is valid, and means the data will
|
|
// be stored contiguously).
|
|
ImageFrame(ImageFormat::Format format, int width, int height,
|
|
uint32 alignment_boundary);
|
|
// Same as above, but use kDefaultAlignmentBoundary for alignment_boundary.
|
|
ImageFrame(ImageFormat::Format format, int width, int height);
|
|
|
|
// Acquires ownership of pixel_data. Sets the deletion method
|
|
// to use on pixel_data with deletion_method (which defaults
|
|
// to using delete[]). pixel_data must have been allocated of
|
|
// size at least width_step*height and width_step must be at least
|
|
// width*num_channels*depth. Both width_step and depth are in units
|
|
// of bytes.
|
|
ImageFrame(ImageFormat::Format format, int width, int height, int width_step,
|
|
uint8* pixel_data,
|
|
Deleter deleter = std::default_delete<uint8[]>());
|
|
|
|
ImageFrame(ImageFrame&& move_from);
|
|
ImageFrame& operator=(ImageFrame&& move_from);
|
|
|
|
// Returns true if the ImageFrame is unallocated.
|
|
bool IsEmpty() const { return pixel_data_ == nullptr; }
|
|
|
|
// Set the entire frame allocation to zero, including alignment
|
|
// padding areas.
|
|
void SetToZero();
|
|
// Set the padding bytes at the end of each row (that are used for
|
|
// alignment) to deterministic values. This function should be called
|
|
// to get deterministic behavior from functions that read the padding
|
|
// areas (generally as part of highly optimized operations such as
|
|
// those in ffmpeg).
|
|
void SetAlignmentPaddingAreas();
|
|
|
|
// Returns true if the data is stored contiguously (without any
|
|
// alignment padding areas).
|
|
bool IsContiguous() const;
|
|
|
|
// Returns true if each row of the data is aligned to
|
|
// alignment_boundary. If IsAligned(16) is true then so are
|
|
// IsAligned(8), IsAligned(4), IsAligned(2), and IsAligned(1).
|
|
// alignment_boundary must be 1 or a power of 2.
|
|
bool IsAligned(uint32 alignment_boundary) const;
|
|
|
|
// Returns the image / video format.
|
|
ImageFormat::Format Format() const { return format_; }
|
|
// Returns the width of the image in pixels.
|
|
int Width() const { return width_; }
|
|
// Returns the height of the image in pixels.
|
|
int Height() const { return height_; }
|
|
// Returns the channel size.
|
|
int ChannelSize() const;
|
|
// Returns the number of channels.
|
|
int NumberOfChannels() const;
|
|
// Returns the depth of each image channel in bytes.
|
|
int ByteDepth() const;
|
|
|
|
// Returns the byte offset between a pixel value and the same pixel
|
|
// and channel in the next row. Notice, that for alignment reasons,
|
|
// there may be unused padding bytes at the end of each row
|
|
// (WidthStep() - Width()*NumberOfChannels*ByteDepth() will give the
|
|
// number of unused bytes).
|
|
int WidthStep() const { return width_step_; }
|
|
|
|
// Reset the current image frame and copy the data from image_frame into
|
|
// this image frame. The alignment_boundary must be given (and won't
|
|
// necessarily match the alignment_boundary of the input image_frame).
|
|
void CopyFrom(const ImageFrame& image_frame, uint32 alignment_boundary);
|
|
|
|
// Get a mutable pointer to the underlying image data. The ImageFrame
|
|
// retains ownership.
|
|
uint8* MutablePixelData() { return pixel_data_.get(); }
|
|
// Get a const pointer to the underlying image data.
|
|
const uint8* PixelData() const { return pixel_data_.get(); }
|
|
|
|
// Returns the total size of the pixel data.
|
|
int PixelDataSize() const { return Height() * WidthStep(); }
|
|
// Returns the total size the pixel data would take if it was stored
|
|
// contiguously (which may not be the case).
|
|
int PixelDataSizeStoredContiguously() const {
|
|
return Width() * Height() * ByteDepth() * NumberOfChannels();
|
|
}
|
|
|
|
// Initializes ImageFrame from pixel data without copying.
|
|
// ImageFrame takes ownership of pixel_data. See the Constructor
|
|
// with the same arguments for details.
|
|
void AdoptPixelData(ImageFormat::Format format, int width, int height,
|
|
int width_step, uint8* pixel_data,
|
|
Deleter deleter = std::default_delete<uint8[]>());
|
|
|
|
// Resets the ImageFrame and makes it a copy of the provided pixel
|
|
// data, which is assumed to be stored contiguously. The ImageFrame
|
|
// will use the given alignment_boundary.
|
|
void CopyPixelData(ImageFormat::Format format, int width, int height,
|
|
const uint8* pixel_data, uint32 alignment_boundary);
|
|
|
|
// Resets the ImageFrame and makes it a copy of the provided pixel
|
|
// data, with given width_step. The ImageFrame
|
|
// will use the given alignment_boundary.
|
|
void CopyPixelData(ImageFormat::Format format, int width, int height,
|
|
int width_step, const uint8* pixel_data,
|
|
uint32 alignment_boundary);
|
|
|
|
// Allocates a frame of the specified format, width, height, and alignment,
|
|
// without clearing any current pixel data. See the constructor with the same
|
|
// argument list.
|
|
void Reset(ImageFormat::Format format, int width, int height,
|
|
uint32 alignment_boundary);
|
|
|
|
// Relinquishes ownership of the pixel data. Notice that the unique_ptr
|
|
// uses a non-standard deleter.
|
|
std::unique_ptr<uint8[], Deleter> Release();
|
|
|
|
// Copy the 8-bit ImageFrame into a contiguous, pre-allocated buffer. Note
|
|
// that ImageFrame does not necessarily store its data contiguously (i.e. do
|
|
// not use copy_n to move image data).
|
|
void CopyToBuffer(uint8* buffer, int buffer_size) const;
|
|
|
|
// A version of CopyToBuffer for 16-bit pixel data. Note that buffer_size
|
|
// stores the number of 16-bit elements in the buffer, not the number of
|
|
// bytes.
|
|
void CopyToBuffer(uint16* buffer, int buffer_size) const;
|
|
|
|
// A version of CopyToBuffer for float pixel data. Note that buffer_size
|
|
// stores the number of float elements in the buffer, not the number of
|
|
// bytes.
|
|
void CopyToBuffer(float* buffer, int buffer_size) const;
|
|
|
|
// Returns an error message which prints out the format encountered.
|
|
static std::string InvalidFormatString(ImageFormat::Format format);
|
|
|
|
private:
|
|
// Returns true if alignment_number is 1 or a power of 2.
|
|
static bool IsValidAlignmentNumber(uint32 alignment_boundary);
|
|
|
|
// The internal implementation of copying data from the provided pixel data.
|
|
// If width_step is 0, then calculates width_step assuming no padding.
|
|
void InternalCopyFrom(int width, int height, int width_step, int channel_size,
|
|
const uint8* pixel_data);
|
|
|
|
// The internal implementation of copying data to the provided buffer.
|
|
// If width_step is 0, then calculates width_step assuming no padding.
|
|
void InternalCopyToBuffer(int width_step, char* buffer) const;
|
|
|
|
ImageFormat::Format format_;
|
|
int width_;
|
|
int height_;
|
|
int width_step_;
|
|
|
|
std::unique_ptr<uint8[], Deleter> pixel_data_;
|
|
};
|
|
|
|
} // namespace mediapipe
|
|
|
|
#endif // MEDIAPIPE_FRAMEWORK_FORMATS_IMAGE_FRAME_H_
|