257 lines
10 KiB
C++
257 lines
10 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_
|
|
#define MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_
|
|
|
|
#include <functional>
|
|
#include <memory>
|
|
|
|
#include "libyuv/video_common.h"
|
|
#include "mediapipe/framework/port/integral_types.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
// Generic data structure for representing various 8-bit YUV image formats with
|
|
// pixel format specification in FourCC. The class is also capable of
|
|
// representing higher bit depth YUV image formats (10-bit, 12-bit, or 16-bit)
|
|
// where each format uses the lower bits of a uint16. For these high bit depth
|
|
// configurations, only the fully planar representation (i.e., u/v are not
|
|
// interleaved) with chroma subsampling of 420 is supported. Although there are
|
|
// high bit depth fourcc codes, none of them are defined or supported by libyuv,
|
|
// and there does not appear to be a standard code for the fully planar 10-bit
|
|
// format we use (this format is efficient for in memory manipulation but not
|
|
// necessarily for transport). Therefore, when bit_depth > 8, the only allowable
|
|
// chroma subsampling is 420 and the corresponding fourc_cc will be FOURCC_ANY.
|
|
//
|
|
// This class is primarily designed as a wrapper around 8-bit YUV image formats
|
|
// used by Android (NV21, YV12) and FFmpeg (I420 a.k.a. YCbCr420P).
|
|
//
|
|
// Note that YUV and YCbCr, although often used interchangeably, are different.
|
|
// The YUV color space was developed for analog systems and is not defined
|
|
// precisely in the technical and scientific literature; instead, it refers to a
|
|
// whole family of luminance/chrominance color spaces. On the other hand, the
|
|
// YCbCr color space is defined in the ITU-R BT.601-5 and ITU-R BT.709-5
|
|
// standards of ITU (International Telecommunication Union) for digital systems.
|
|
// Thus, YCbCr420P is referring to a specific digital color space and a specific
|
|
// storage format.
|
|
//
|
|
// Class takes ownership of the pixel data buffers provided as input to the
|
|
// constructor or Initialize().
|
|
//
|
|
// A typical FFmpeg usage would be:
|
|
//
|
|
// AVFrame frame;
|
|
// avcodec_decode_video2(&codec_context, &frame, &got_frame, &av_packet);
|
|
// const size_t y_size = frame.linesize[0] * height;
|
|
// const size_t u_size = frame.linesize[1] * ((height + 1) / 2);
|
|
// const size_t v_size = frame.linesize[2] * ((height + 1) / 2);
|
|
// auto y = absl::make_unique<uint8[]> y(y_size);
|
|
// auto u = absl::make_unique<uint8[]> u(u_size);
|
|
// auto v = absl::make_unique<uint8[]> v(v_size);
|
|
// libyuv::I420Copy(frame.data[0], frame.linesize[0],
|
|
// frame.data[1], frame.linesize[1],
|
|
// frame.data[2], frame.linesize[2],
|
|
// y.get(), frame.linesize[0],
|
|
// u.get(), frame.linesize[1],
|
|
// v.get(), frame.linesize[2],
|
|
// width, height);
|
|
// Outputs().Tag("VIDEO")->Add(new YUVImage(libyuv::FOURCC_I420,
|
|
// std::move(y), frame.linesize[0],
|
|
// std::move(u), frame.linesize[1],
|
|
// std::move(v), frame.linesize[2],
|
|
// width, height),
|
|
// timestamp);
|
|
//
|
|
// Note that for formats with subsampled U and V channels, like I420, the
|
|
// dimensions of the U and V channels are half the dimensions of the Y channel,
|
|
// rounded up. Rounding up can be accomplished by adding one to the Y dimensions
|
|
// before dividing by 2.
|
|
//
|
|
// Please do not add new constructors unless it is unavoidable; the default
|
|
// constructor followed by Initialize() should cover most of the use cases.
|
|
class YUVImage {
|
|
public:
|
|
// The matrix coefficients used (e.g., defines the conversion matrix from
|
|
// Ycbcr
|
|
// to RGB).
|
|
enum ColorMatrixCoefficients {
|
|
COLOR_MATRIX_COEFFICIENTS_RGB = 0,
|
|
// Also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B.
|
|
COLOR_MATRIX_COEFFICIENTS_BT709 = 1,
|
|
COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED = 2,
|
|
COLOR_MATRIX_COEFFICIENTS_FCC = 4,
|
|
// Also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL &
|
|
/// SECAM / IEC 61966-2-4 xvYCC601.
|
|
COLOR_MATRIX_COEFFICIENTS_BT470BG = 5,
|
|
// Also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC /
|
|
/// functionally identical to above.
|
|
COLOR_MATRIX_COEFFICIENTS_SMPTE170M = 6,
|
|
COLOR_MATRIX_COEFFICIENTS_SMPTE240M = 7,
|
|
// Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16.
|
|
COLOR_MATRIX_COEFFICIENTS_YCOCG = 8,
|
|
// ITU-R BT2020 non-constant luminance system.
|
|
COLOR_MATRIX_COEFFICIENTS_BT2020_NCL = 9,
|
|
// ITU-R BT2020 constant luminance system.
|
|
COLOR_MATRIX_COEFFICIENTS_BT2020_CL = 10,
|
|
// SMPTE 2085, Y'D'zD'x
|
|
COLOR_MATRIX_COEFFICIENTS_SMPTE2085 = 11,
|
|
// Chromaticity-derived non-constant luminance.
|
|
COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_NCL = 12,
|
|
// Chromaticity-derived constant luminance.
|
|
COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_CL = 13,
|
|
// ITU-R BT.[HDR-TV] ICtCp
|
|
COLOR_MATRIX_COEFFICIENTS_ICTCP = 14,
|
|
};
|
|
|
|
YUVImage() = default;
|
|
~YUVImage() { Clear(); }
|
|
|
|
// Convenience constructor
|
|
YUVImage(libyuv::FourCC fourcc, //
|
|
std::unique_ptr<uint8[]> data_location, //
|
|
uint8* data0, int stride0, //
|
|
uint8* data1, int stride1, //
|
|
uint8* data2, int stride2, //
|
|
int width, int height, int bit_depth = 8) {
|
|
uint8* tmp = data_location.release();
|
|
std::function<void()> deallocate = [tmp]() { delete[] tmp; };
|
|
Initialize(fourcc, //
|
|
deallocate, //
|
|
data0, stride0, //
|
|
data1, stride1, //
|
|
data2, stride2, //
|
|
width, height, bit_depth);
|
|
}
|
|
|
|
// Convenience constructor to construct the YUVImage with data stored
|
|
// in three unique_ptrs.
|
|
YUVImage(libyuv::FourCC fourcc, //
|
|
std::unique_ptr<uint8[]> data0, int stride0, //
|
|
std::unique_ptr<uint8[]> data1, int stride1, //
|
|
std::unique_ptr<uint8[]> data2, int stride2, //
|
|
int width, int height, int bit_depth = 8) {
|
|
uint8* tmp0 = data0.release();
|
|
uint8* tmp1 = data1.release();
|
|
uint8* tmp2 = data2.release();
|
|
std::function<void()> deallocate = [tmp0, tmp1, tmp2]() {
|
|
delete[] tmp0;
|
|
delete[] tmp1;
|
|
delete[] tmp2;
|
|
};
|
|
Initialize(fourcc, //
|
|
deallocate, //
|
|
tmp0, stride0, //
|
|
tmp1, stride1, //
|
|
tmp2, stride2, //
|
|
width, height, bit_depth);
|
|
}
|
|
|
|
// Clear and initialize member variables.
|
|
//
|
|
// First argument is an enum of FourCC (see http://www.fourcc.org/yuv.php)
|
|
// defined in libyuv/video_common.h
|
|
//
|
|
// A deallocation function is provided which will be called on the next
|
|
// Clear() or on destruction.
|
|
//
|
|
// The next three argument pairs are pointer to pixel data buffer for each
|
|
// plane and its image stride (http://en.wikipedia.org/wiki/Stride).
|
|
//
|
|
// The class is very generic and it is up to the user how they want
|
|
// to use this data holder class. For example, if one intends to
|
|
// use this for NV21, one can ignore data2 and stride2 by giving
|
|
// nullptr and 0, respectively, and call the right libyuv functions
|
|
// for actual processing. This class is agnostic of the data and the
|
|
// pixel format it holds.
|
|
void Initialize(libyuv::FourCC fourcc, //
|
|
std::function<void()> deallocation_function, //
|
|
uint8* data0, int stride0, //
|
|
uint8* data1, int stride1, //
|
|
uint8* data2, int stride2, //
|
|
int width, int height, int bit_depth = 8) {
|
|
Clear();
|
|
deallocation_function_ = deallocation_function;
|
|
fourcc_ = fourcc;
|
|
data_[0] = data0;
|
|
stride_[0] = stride0;
|
|
data_[1] = data1;
|
|
stride_[1] = stride1;
|
|
data_[2] = data2;
|
|
stride_[2] = stride2;
|
|
width_ = width;
|
|
height_ = height;
|
|
bit_depth_ = bit_depth;
|
|
}
|
|
|
|
void Clear() {
|
|
if (deallocation_function_) {
|
|
deallocation_function_();
|
|
deallocation_function_ = nullptr;
|
|
}
|
|
fourcc_ = libyuv::FOURCC_ANY;
|
|
data_[0] = nullptr;
|
|
data_[1] = nullptr;
|
|
data_[2] = nullptr;
|
|
stride_[0] = 0;
|
|
stride_[1] = 0;
|
|
stride_[2] = 0;
|
|
width_ = 0;
|
|
height_ = 0;
|
|
bit_depth_ = 0;
|
|
}
|
|
|
|
// Getters.
|
|
libyuv::FourCC fourcc() const { return fourcc_; }
|
|
const uint8* data(int index) const { return data_[index]; }
|
|
int stride(int index) const { return stride_[index]; }
|
|
int width() const { return width_; }
|
|
int height() const { return height_; }
|
|
int bit_depth() const { return bit_depth_; }
|
|
ColorMatrixCoefficients matrix_coefficients() const {
|
|
return matrix_coefficients_;
|
|
}
|
|
bool full_range() const { return full_range_; }
|
|
|
|
// Setters.
|
|
void set_fourcc(libyuv::FourCC fourcc) { fourcc_ = fourcc; }
|
|
uint8* mutable_data(int index) { return data_[index]; }
|
|
void set_stride(int index, int stride) { stride_[index] = stride; }
|
|
void set_width(int width) { width_ = width; }
|
|
void set_height(int height) { height_ = height; }
|
|
void set_matrix_coefficients(ColorMatrixCoefficients coeffs) {
|
|
matrix_coefficients_ = coeffs;
|
|
}
|
|
void set_full_range(bool full_range) { full_range_ = full_range; }
|
|
|
|
private:
|
|
static constexpr int kMaxNumPlanes = 3;
|
|
|
|
std::function<void()> deallocation_function_;
|
|
|
|
libyuv::FourCC fourcc_ = libyuv::FOURCC_ANY;
|
|
uint8* data_[kMaxNumPlanes];
|
|
int stride_[kMaxNumPlanes];
|
|
int width_ = 0;
|
|
int height_ = 0;
|
|
int bit_depth_ = 0;
|
|
ColorMatrixCoefficients matrix_coefficients_ =
|
|
ColorMatrixCoefficients::COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED;
|
|
bool full_range_ = false;
|
|
};
|
|
|
|
} // namespace mediapipe
|
|
|
|
#endif // MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_
|