// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_ #define MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_ #include #include #include "libyuv/video_common.h" #include "mediapipe/framework/port/integral_types.h" namespace mediapipe { // Generic data structure for representing various 8-bit YUV image formats with // pixel format specification in FourCC. The class is also capable of // representing higher bit depth YUV image formats (10-bit, 12-bit, or 16-bit) // where each format uses the lower bits of a uint16. For these high bit depth // configurations, only the fully planar representation (i.e., u/v are not // interleaved) with chroma subsampling of 420 is supported. Although there are // high bit depth fourcc codes, none of them are defined or supported by libyuv, // and there does not appear to be a standard code for the fully planar 10-bit // format we use (this format is efficient for in memory manipulation but not // necessarily for transport). Therefore, when bit_depth > 8, the only allowable // chroma subsampling is 420 and the corresponding fourc_cc will be FOURCC_ANY. // // This class is primarily designed as a wrapper around 8-bit YUV image formats // used by Android (NV21, YV12) and FFmpeg (I420 a.k.a. YCbCr420P). // // Note that YUV and YCbCr, although often used interchangeably, are different. // The YUV color space was developed for analog systems and is not defined // precisely in the technical and scientific literature; instead, it refers to a // whole family of luminance/chrominance color spaces. On the other hand, the // YCbCr color space is defined in the ITU-R BT.601-5 and ITU-R BT.709-5 // standards of ITU (International Telecommunication Union) for digital systems. // Thus, YCbCr420P is referring to a specific digital color space and a specific // storage format. // // Class takes ownership of the pixel data buffers provided as input to the // constructor or Initialize(). // // A typical FFmpeg usage would be: // // AVFrame frame; // avcodec_decode_video2(&codec_context, &frame, &got_frame, &av_packet); // const size_t y_size = frame.linesize[0] * height; // const size_t u_size = frame.linesize[1] * ((height + 1) / 2); // const size_t v_size = frame.linesize[2] * ((height + 1) / 2); // auto y = absl::make_unique y(y_size); // auto u = absl::make_unique u(u_size); // auto v = absl::make_unique v(v_size); // libyuv::I420Copy(frame.data[0], frame.linesize[0], // frame.data[1], frame.linesize[1], // frame.data[2], frame.linesize[2], // y.get(), frame.linesize[0], // u.get(), frame.linesize[1], // v.get(), frame.linesize[2], // width, height); // Outputs().Tag("VIDEO")->Add(new YUVImage(libyuv::FOURCC_I420, // std::move(y), frame.linesize[0], // std::move(u), frame.linesize[1], // std::move(v), frame.linesize[2], // width, height), // timestamp); // // Note that for formats with subsampled U and V channels, like I420, the // dimensions of the U and V channels are half the dimensions of the Y channel, // rounded up. Rounding up can be accomplished by adding one to the Y dimensions // before dividing by 2. // // Please do not add new constructors unless it is unavoidable; the default // constructor followed by Initialize() should cover most of the use cases. class YUVImage { public: // The matrix coefficients used (e.g., defines the conversion matrix from // Ycbcr // to RGB). enum ColorMatrixCoefficients { COLOR_MATRIX_COEFFICIENTS_RGB = 0, // Also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B. COLOR_MATRIX_COEFFICIENTS_BT709 = 1, COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED = 2, COLOR_MATRIX_COEFFICIENTS_FCC = 4, // Also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & /// SECAM / IEC 61966-2-4 xvYCC601. COLOR_MATRIX_COEFFICIENTS_BT470BG = 5, // Also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / /// functionally identical to above. COLOR_MATRIX_COEFFICIENTS_SMPTE170M = 6, COLOR_MATRIX_COEFFICIENTS_SMPTE240M = 7, // Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16. COLOR_MATRIX_COEFFICIENTS_YCOCG = 8, // ITU-R BT2020 non-constant luminance system. COLOR_MATRIX_COEFFICIENTS_BT2020_NCL = 9, // ITU-R BT2020 constant luminance system. COLOR_MATRIX_COEFFICIENTS_BT2020_CL = 10, // SMPTE 2085, Y'D'zD'x COLOR_MATRIX_COEFFICIENTS_SMPTE2085 = 11, // Chromaticity-derived non-constant luminance. COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_NCL = 12, // Chromaticity-derived constant luminance. COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_CL = 13, // ITU-R BT.[HDR-TV] ICtCp COLOR_MATRIX_COEFFICIENTS_ICTCP = 14, }; YUVImage() = default; ~YUVImage() { Clear(); } // Convenience constructor YUVImage(libyuv::FourCC fourcc, // std::unique_ptr data_location, // uint8* data0, int stride0, // uint8* data1, int stride1, // uint8* data2, int stride2, // int width, int height, int bit_depth = 8) { uint8* tmp = data_location.release(); std::function deallocate = [tmp]() { delete[] tmp; }; Initialize(fourcc, // deallocate, // data0, stride0, // data1, stride1, // data2, stride2, // width, height, bit_depth); } // Convenience constructor to construct the YUVImage with data stored // in three unique_ptrs. YUVImage(libyuv::FourCC fourcc, // std::unique_ptr data0, int stride0, // std::unique_ptr data1, int stride1, // std::unique_ptr data2, int stride2, // int width, int height, int bit_depth = 8) { uint8* tmp0 = data0.release(); uint8* tmp1 = data1.release(); uint8* tmp2 = data2.release(); std::function deallocate = [tmp0, tmp1, tmp2]() { delete[] tmp0; delete[] tmp1; delete[] tmp2; }; Initialize(fourcc, // deallocate, // tmp0, stride0, // tmp1, stride1, // tmp2, stride2, // width, height, bit_depth); } // Clear and initialize member variables. // // First argument is an enum of FourCC (see http://www.fourcc.org/yuv.php) // defined in libyuv/video_common.h // // A deallocation function is provided which will be called on the next // Clear() or on destruction. // // The next three argument pairs are pointer to pixel data buffer for each // plane and its image stride (http://en.wikipedia.org/wiki/Stride). // // The class is very generic and it is up to the user how they want // to use this data holder class. For example, if one intends to // use this for NV21, one can ignore data2 and stride2 by giving // nullptr and 0, respectively, and call the right libyuv functions // for actual processing. This class is agnostic of the data and the // pixel format it holds. void Initialize(libyuv::FourCC fourcc, // std::function deallocation_function, // uint8* data0, int stride0, // uint8* data1, int stride1, // uint8* data2, int stride2, // int width, int height, int bit_depth = 8) { Clear(); deallocation_function_ = deallocation_function; fourcc_ = fourcc; data_[0] = data0; stride_[0] = stride0; data_[1] = data1; stride_[1] = stride1; data_[2] = data2; stride_[2] = stride2; width_ = width; height_ = height; bit_depth_ = bit_depth; } void Clear() { if (deallocation_function_) { deallocation_function_(); deallocation_function_ = nullptr; } fourcc_ = libyuv::FOURCC_ANY; data_[0] = nullptr; data_[1] = nullptr; data_[2] = nullptr; stride_[0] = 0; stride_[1] = 0; stride_[2] = 0; width_ = 0; height_ = 0; bit_depth_ = 0; } // Getters. libyuv::FourCC fourcc() const { return fourcc_; } const uint8* data(int index) const { return data_[index]; } int stride(int index) const { return stride_[index]; } int width() const { return width_; } int height() const { return height_; } int bit_depth() const { return bit_depth_; } ColorMatrixCoefficients matrix_coefficients() const { return matrix_coefficients_; } bool full_range() const { return full_range_; } // Setters. void set_fourcc(libyuv::FourCC fourcc) { fourcc_ = fourcc; } uint8* mutable_data(int index) { return data_[index]; } void set_stride(int index, int stride) { stride_[index] = stride; } void set_width(int width) { width_ = width; } void set_height(int height) { height_ = height; } void set_matrix_coefficients(ColorMatrixCoefficients coeffs) { matrix_coefficients_ = coeffs; } void set_full_range(bool full_range) { full_range_ = full_range; } private: static constexpr int kMaxNumPlanes = 3; std::function deallocation_function_; libyuv::FourCC fourcc_ = libyuv::FOURCC_ANY; uint8* data_[kMaxNumPlanes]; int stride_[kMaxNumPlanes]; int width_ = 0; int height_ = 0; int bit_depth_ = 0; ColorMatrixCoefficients matrix_coefficients_ = ColorMatrixCoefficients::COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED; bool full_range_ = false; }; } // namespace mediapipe #endif // MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_