mediapipe/mediapipe/gpu/gl_scaler_calculator.cc
MediaPipe Team 710fb3de58 Project import generated by Copybara.
GitOrigin-RevId: 1610e588e497817fae2d9a458093ab6a370e2972
2021-08-18 17:45:46 -07:00

402 lines
14 KiB
C++

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/tool/options_util.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_quad_renderer.h"
#include "mediapipe/gpu/gl_scaler_calculator.pb.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/shader_util.h"
#ifdef __ANDROID__
// The size of Java arrays is dynamic, which makes it difficult to
// generate the right packet type with a fixed size. Therefore, we
// are using unsized arrays on Android.
typedef int DimensionsPacketType[];
#else
typedef int DimensionsPacketType[2];
#endif
namespace mediapipe {
constexpr char kLeftRightPaddingTag[] = "LEFT_RIGHT_PADDING";
constexpr char kTopBottomPaddingTag[] = "TOP_BOTTOM_PADDING";
constexpr char kOptionsTag[] = "OPTIONS";
constexpr char kOutputDimensionsTag[] = "OUTPUT_DIMENSIONS";
constexpr char kRotationTag[] = "ROTATION";
constexpr char kImageTag[] = "IMAGE";
using Image = mediapipe::Image;
// Scales, rotates, horizontal or vertical flips the image.
// See GlSimpleCalculatorBase for inputs, outputs and input side packets.
// Additional input streams:
// ROTATION: the counterclockwise rotation angle in degrees. This allows
// user to specify different rotation angles for different frames. If this
// stream is provided, it will override the ROTATION input side packet.
// OUTPUT_DIMENSIONS: the output width and height in pixels.
// Additional output streams:
// TOP_BOTTOM_PADDING: If use FIT scale mode, this stream outputs the padding
// size of the input image in normalized value [0, 1] for top and bottom
// sides with equal padding. E.g. Using FIT scale mode, if the input images
// size is 10x10 and the required output size is 20x40, then the top and
// bottom side of the image will both having padding of 10 pixels. So the
// value of output stream is 10 / 40 = 0.25.
// LEFT_RIGHT_PADDING: If use FIT scale mode, this stream outputs the padding
// size of the input image in normalized value [0, 1] for left and right side.
// E.g. Using FIT scale mode, if the input images size is 10x10 and the
// required output size is 6x5, then the left and right side of the image will
// both having padding of 1 pixels. So the value of output stream is 1 / 5 =
// 0.2.
// Additional input side packets:
// OPTIONS: the GlScalerCalculatorOptions to use. Will replace or merge with
// existing calculator options, depending on field merge_fields.
// OUTPUT_DIMENSIONS: the output width and height in pixels.
// ROTATION: the counterclockwise rotation angle in degrees.
// These can also be specified as options.
// To enable horizontal or vertical flip, specify them in options.
// The flipping is applied after rotation.
class GlScalerCalculator : public CalculatorBase {
public:
GlScalerCalculator() {}
~GlScalerCalculator();
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
absl::Status GlSetup();
absl::Status GlRender(const GlTexture& src, const GlTexture& dst);
void GetOutputDimensions(int src_width, int src_height, int* dst_width,
int* dst_height);
void GetOutputPadding(int src_width, int src_height, int dst_width,
int dst_height, float* top_bottom_padding,
float* left_right_padding);
GpuBufferFormat GetOutputFormat() { return GpuBufferFormat::kBGRA32; }
private:
GlCalculatorHelper helper_;
int dst_width_ = 0;
int dst_height_ = 0;
float dst_scale_ = -1.f;
FrameRotation rotation_;
std::unique_ptr<QuadRenderer> rgb_renderer_;
std::unique_ptr<QuadRenderer> yuv_renderer_;
#ifdef __ANDROID__
std::unique_ptr<QuadRenderer> ext_rgb_renderer_;
#endif
bool vertical_flip_output_;
bool horizontal_flip_output_;
FrameScaleMode scale_mode_ = FrameScaleMode::kStretch;
};
REGISTER_CALCULATOR(GlScalerCalculator);
// static
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
if (cc->Inputs().HasTag(kImageTag)) {
cc->Inputs().Tag(kImageTag).Set<Image>();
} else {
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
}
if (cc->Outputs().HasTag(kImageTag)) {
cc->Outputs().Tag(kImageTag).Set<Image>();
} else {
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
}
if (cc->Inputs().HasTag(kRotationTag)) {
cc->Inputs().Tag(kRotationTag).Set<int>();
}
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
cc->Inputs().Tag(kOutputDimensionsTag).Set<DimensionsPacketType>();
}
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
cc->InputSidePackets().Tag(kOptionsTag).Set<GlScalerCalculatorOptions>();
}
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
.Set<DimensionsPacketType>();
}
if (cc->InputSidePackets().HasTag(kRotationTag)) {
// Counterclockwise rotation.
cc->InputSidePackets().Tag(kRotationTag).Set<int>();
}
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
cc->Outputs().Tag(kTopBottomPaddingTag).Set<float>();
cc->Outputs().Tag(kLeftRightPaddingTag).Set<float>();
}
return absl::OkStatus();
}
absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
// Inform the framework that we always output at the same timestamp
// as we receive a packet at.
cc->SetOffset(mediapipe::TimestampDiff(0));
// Let the helper access the GL context information.
MP_RETURN_IF_ERROR(helper_.Open(cc));
int rotation_ccw = 0;
const auto& options =
tool::RetrieveOptions(cc->Options<GlScalerCalculatorOptions>(),
cc->InputSidePackets(), "OPTIONS");
if (options.has_output_width()) {
dst_width_ = options.output_width();
}
if (options.has_output_height()) {
dst_height_ = options.output_height();
}
if (options.has_output_scale()) {
dst_scale_ = options.output_scale();
}
if (options.has_rotation()) {
rotation_ccw = options.rotation();
}
if (options.has_flip_vertical()) {
vertical_flip_output_ = options.flip_vertical();
} else {
vertical_flip_output_ = false;
}
if (options.has_flip_horizontal()) {
horizontal_flip_output_ = options.flip_horizontal();
} else {
horizontal_flip_output_ = false;
}
if (options.has_scale_mode()) {
scale_mode_ =
FrameScaleModeFromProto(options.scale_mode(), FrameScaleMode::kStretch);
}
if (HasTagOrIndex(cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
const auto& dimensions =
TagOrIndex(cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
.Get<DimensionsPacketType>();
dst_width_ = dimensions[0];
dst_height_ = dimensions[1];
}
if (cc->InputSidePackets().HasTag(kRotationTag)) {
rotation_ccw = cc->InputSidePackets().Tag(kRotationTag).Get<int>();
}
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
return absl::OkStatus();
}
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
if (cc->Inputs().Tag(kOutputDimensionsTag).IsEmpty()) {
// OUTPUT_DIMENSIONS input stream is specified, but value is missing.
return absl::OkStatus();
}
const auto& dimensions =
cc->Inputs().Tag(kOutputDimensionsTag).Get<DimensionsPacketType>();
dst_width_ = dimensions[0];
dst_height_ = dimensions[1];
}
return helper_.RunInGlContext([this, cc]() -> absl::Status {
const auto& input =
cc->Inputs().HasTag(kImageTag)
? cc->Inputs().Tag(kImageTag).Get<Image>().GetGpuBuffer()
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
QuadRenderer* renderer = nullptr;
GlTexture src1;
GlTexture src2;
#ifdef __APPLE__
if (input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8VideoRange ||
input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8FullRange) {
if (!yuv_renderer_) {
yuv_renderer_ = absl::make_unique<QuadRenderer>();
MP_RETURN_IF_ERROR(yuv_renderer_->GlSetup(
kYUV2TexToRGBFragmentShader, {"video_frame_y", "video_frame_uv"}));
}
renderer = yuv_renderer_.get();
src1 = helper_.CreateSourceTexture(input, 0);
src2 = helper_.CreateSourceTexture(input, 1);
} else // NOLINT(readability/braces)
#endif // __APPLE__
{
src1 = helper_.CreateSourceTexture(input);
#ifdef __ANDROID__
if (src1.target() == GL_TEXTURE_EXTERNAL_OES) {
if (!ext_rgb_renderer_) {
ext_rgb_renderer_ = absl::make_unique<QuadRenderer>();
MP_RETURN_IF_ERROR(ext_rgb_renderer_->GlSetup(
kBasicTexturedFragmentShaderOES, {"video_frame"}));
}
renderer = ext_rgb_renderer_.get();
} else // NOLINT(readability/braces)
#endif // __ANDROID__
{
if (!rgb_renderer_) {
rgb_renderer_ = absl::make_unique<QuadRenderer>();
MP_RETURN_IF_ERROR(rgb_renderer_->GlSetup());
}
renderer = rgb_renderer_.get();
}
}
RET_CHECK(renderer) << "Unsupported input texture type";
// Override input side packet if ROTATION input packet is provided.
if (cc->Inputs().HasTag(kRotationTag)) {
int rotation_ccw = cc->Inputs().Tag(kRotationTag).Get<int>();
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
}
int dst_width;
int dst_height;
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
float top_bottom_padding;
float left_right_padding;
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
&top_bottom_padding, &left_right_padding);
cc->Outputs()
.Tag(kTopBottomPaddingTag)
.AddPacket(
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
cc->Outputs()
.Tag(kLeftRightPaddingTag)
.AddPacket(
MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
}
auto dst = helper_.CreateDestinationTexture(dst_width, dst_height,
GetOutputFormat());
helper_.BindFramebuffer(dst);
glActiveTexture(GL_TEXTURE1);
glBindTexture(src1.target(), src1.name());
if (src2.name()) {
glActiveTexture(GL_TEXTURE2);
glBindTexture(src2.target(), src2.name());
}
MP_RETURN_IF_ERROR(renderer->GlRender(
src1.width(), src1.height(), dst.width(), dst.height(), scale_mode_,
rotation_, horizontal_flip_output_, vertical_flip_output_,
/*flip_texture*/ false));
glActiveTexture(GL_TEXTURE1);
glBindTexture(src1.target(), 0);
if (src2.name()) {
glActiveTexture(GL_TEXTURE2);
glBindTexture(src2.target(), 0);
}
glFlush();
if (cc->Outputs().HasTag(kImageTag)) {
auto output = dst.GetFrame<Image>();
cc->Outputs().Tag(kImageTag).Add(output.release(), cc->InputTimestamp());
} else {
auto output = dst.GetFrame<GpuBuffer>();
TagOrIndex(&cc->Outputs(), "VIDEO", 0)
.Add(output.release(), cc->InputTimestamp());
}
return absl::OkStatus();
});
}
void GlScalerCalculator::GetOutputDimensions(int src_width, int src_height,
int* dst_width, int* dst_height) {
if (dst_width_ > 0 && dst_height_ > 0) {
*dst_width = dst_width_;
*dst_height = dst_height_;
return;
}
if (dst_scale_ > 0) {
// Scales the destination size, but just uses src size as a temporary for
// calculations.
src_width = static_cast<int>(src_width * dst_scale_);
src_height = static_cast<int>(src_height * dst_scale_);
// Round to nearest multiply of 4 for better memory alignment.
src_width = ((src_width + 2) >> 2) << 2;
src_height = ((src_height + 2) >> 2) << 2;
}
if (rotation_ == FrameRotation::k90 || rotation_ == FrameRotation::k270) {
*dst_width = src_height;
*dst_height = src_width;
} else {
*dst_width = src_width;
*dst_height = src_height;
}
}
void GlScalerCalculator::GetOutputPadding(int src_width, int src_height,
int dst_width, int dst_height,
float* top_bottom_padding,
float* left_right_padding) {
*top_bottom_padding = 0.0f;
*left_right_padding = 0.0f;
if (rotation_ == FrameRotation::k90 || rotation_ == FrameRotation::k270) {
const int tmp = src_width;
src_width = src_height;
src_height = tmp;
}
if (scale_mode_ == FrameScaleMode::kFit) {
const float src_scale = 1.0f * src_width / src_height;
const float dst_scale = 1.0f * dst_width / dst_height;
if (src_scale - dst_scale > 1e-5) {
// Total padding on top and bottom sides.
*top_bottom_padding =
1.0f - 1.0f * dst_width / src_width * src_height / dst_height;
// Get padding on each side.
*top_bottom_padding /= 2.0f;
} else if (dst_scale - src_scale > 1e-5) {
// Total padding on left and right sides.
*left_right_padding =
1.0f - 1.0f / dst_width * src_width / src_height * dst_height;
// Get padding on each side.
*left_right_padding /= 2.0f;
}
}
}
GlScalerCalculator::~GlScalerCalculator() {
// TODO: use move capture when we have C++14 or better.
QuadRenderer* rgb_renderer = rgb_renderer_.release();
QuadRenderer* yuv_renderer = yuv_renderer_.release();
if (rgb_renderer || yuv_renderer) {
helper_.RunInGlContext([rgb_renderer, yuv_renderer] {
if (rgb_renderer) {
rgb_renderer->GlTeardown();
delete rgb_renderer;
}
if (yuv_renderer) {
yuv_renderer->GlTeardown();
delete yuv_renderer;
}
});
}
}
} // namespace mediapipe