710fb3de58
GitOrigin-RevId: 1610e588e497817fae2d9a458093ab6a370e2972
402 lines
14 KiB
C++
402 lines
14 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/framework/formats/image.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
#include "mediapipe/framework/port/status.h"
|
|
#include "mediapipe/framework/tool/options_util.h"
|
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
|
#include "mediapipe/gpu/gl_quad_renderer.h"
|
|
#include "mediapipe/gpu/gl_scaler_calculator.pb.h"
|
|
#include "mediapipe/gpu/gl_simple_shaders.h"
|
|
#include "mediapipe/gpu/shader_util.h"
|
|
|
|
#ifdef __ANDROID__
|
|
// The size of Java arrays is dynamic, which makes it difficult to
|
|
// generate the right packet type with a fixed size. Therefore, we
|
|
// are using unsized arrays on Android.
|
|
typedef int DimensionsPacketType[];
|
|
#else
|
|
typedef int DimensionsPacketType[2];
|
|
#endif
|
|
|
|
namespace mediapipe {
|
|
|
|
constexpr char kLeftRightPaddingTag[] = "LEFT_RIGHT_PADDING";
|
|
constexpr char kTopBottomPaddingTag[] = "TOP_BOTTOM_PADDING";
|
|
constexpr char kOptionsTag[] = "OPTIONS";
|
|
constexpr char kOutputDimensionsTag[] = "OUTPUT_DIMENSIONS";
|
|
constexpr char kRotationTag[] = "ROTATION";
|
|
constexpr char kImageTag[] = "IMAGE";
|
|
|
|
using Image = mediapipe::Image;
|
|
|
|
// Scales, rotates, horizontal or vertical flips the image.
|
|
// See GlSimpleCalculatorBase for inputs, outputs and input side packets.
|
|
// Additional input streams:
|
|
// ROTATION: the counterclockwise rotation angle in degrees. This allows
|
|
// user to specify different rotation angles for different frames. If this
|
|
// stream is provided, it will override the ROTATION input side packet.
|
|
// OUTPUT_DIMENSIONS: the output width and height in pixels.
|
|
// Additional output streams:
|
|
// TOP_BOTTOM_PADDING: If use FIT scale mode, this stream outputs the padding
|
|
// size of the input image in normalized value [0, 1] for top and bottom
|
|
// sides with equal padding. E.g. Using FIT scale mode, if the input images
|
|
// size is 10x10 and the required output size is 20x40, then the top and
|
|
// bottom side of the image will both having padding of 10 pixels. So the
|
|
// value of output stream is 10 / 40 = 0.25.
|
|
// LEFT_RIGHT_PADDING: If use FIT scale mode, this stream outputs the padding
|
|
// size of the input image in normalized value [0, 1] for left and right side.
|
|
// E.g. Using FIT scale mode, if the input images size is 10x10 and the
|
|
// required output size is 6x5, then the left and right side of the image will
|
|
// both having padding of 1 pixels. So the value of output stream is 1 / 5 =
|
|
// 0.2.
|
|
// Additional input side packets:
|
|
// OPTIONS: the GlScalerCalculatorOptions to use. Will replace or merge with
|
|
// existing calculator options, depending on field merge_fields.
|
|
// OUTPUT_DIMENSIONS: the output width and height in pixels.
|
|
// ROTATION: the counterclockwise rotation angle in degrees.
|
|
// These can also be specified as options.
|
|
// To enable horizontal or vertical flip, specify them in options.
|
|
// The flipping is applied after rotation.
|
|
class GlScalerCalculator : public CalculatorBase {
|
|
public:
|
|
GlScalerCalculator() {}
|
|
~GlScalerCalculator();
|
|
|
|
static absl::Status GetContract(CalculatorContract* cc);
|
|
|
|
absl::Status Open(CalculatorContext* cc) override;
|
|
absl::Status Process(CalculatorContext* cc) override;
|
|
|
|
absl::Status GlSetup();
|
|
absl::Status GlRender(const GlTexture& src, const GlTexture& dst);
|
|
void GetOutputDimensions(int src_width, int src_height, int* dst_width,
|
|
int* dst_height);
|
|
void GetOutputPadding(int src_width, int src_height, int dst_width,
|
|
int dst_height, float* top_bottom_padding,
|
|
float* left_right_padding);
|
|
GpuBufferFormat GetOutputFormat() { return GpuBufferFormat::kBGRA32; }
|
|
|
|
private:
|
|
GlCalculatorHelper helper_;
|
|
int dst_width_ = 0;
|
|
int dst_height_ = 0;
|
|
float dst_scale_ = -1.f;
|
|
FrameRotation rotation_;
|
|
std::unique_ptr<QuadRenderer> rgb_renderer_;
|
|
std::unique_ptr<QuadRenderer> yuv_renderer_;
|
|
#ifdef __ANDROID__
|
|
std::unique_ptr<QuadRenderer> ext_rgb_renderer_;
|
|
#endif
|
|
bool vertical_flip_output_;
|
|
bool horizontal_flip_output_;
|
|
FrameScaleMode scale_mode_ = FrameScaleMode::kStretch;
|
|
};
|
|
REGISTER_CALCULATOR(GlScalerCalculator);
|
|
|
|
// static
|
|
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
|
|
if (cc->Inputs().HasTag(kImageTag)) {
|
|
cc->Inputs().Tag(kImageTag).Set<Image>();
|
|
} else {
|
|
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
|
|
}
|
|
if (cc->Outputs().HasTag(kImageTag)) {
|
|
cc->Outputs().Tag(kImageTag).Set<Image>();
|
|
} else {
|
|
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
|
|
}
|
|
|
|
if (cc->Inputs().HasTag(kRotationTag)) {
|
|
cc->Inputs().Tag(kRotationTag).Set<int>();
|
|
}
|
|
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
|
cc->Inputs().Tag(kOutputDimensionsTag).Set<DimensionsPacketType>();
|
|
}
|
|
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
|
|
|
|
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
|
|
cc->InputSidePackets().Tag(kOptionsTag).Set<GlScalerCalculatorOptions>();
|
|
}
|
|
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
|
|
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
|
|
.Set<DimensionsPacketType>();
|
|
}
|
|
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
|
// Counterclockwise rotation.
|
|
cc->InputSidePackets().Tag(kRotationTag).Set<int>();
|
|
}
|
|
|
|
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
|
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
|
cc->Outputs().Tag(kTopBottomPaddingTag).Set<float>();
|
|
cc->Outputs().Tag(kLeftRightPaddingTag).Set<float>();
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
|
|
// Inform the framework that we always output at the same timestamp
|
|
// as we receive a packet at.
|
|
cc->SetOffset(mediapipe::TimestampDiff(0));
|
|
|
|
// Let the helper access the GL context information.
|
|
MP_RETURN_IF_ERROR(helper_.Open(cc));
|
|
|
|
int rotation_ccw = 0;
|
|
const auto& options =
|
|
tool::RetrieveOptions(cc->Options<GlScalerCalculatorOptions>(),
|
|
cc->InputSidePackets(), "OPTIONS");
|
|
if (options.has_output_width()) {
|
|
dst_width_ = options.output_width();
|
|
}
|
|
if (options.has_output_height()) {
|
|
dst_height_ = options.output_height();
|
|
}
|
|
if (options.has_output_scale()) {
|
|
dst_scale_ = options.output_scale();
|
|
}
|
|
if (options.has_rotation()) {
|
|
rotation_ccw = options.rotation();
|
|
}
|
|
if (options.has_flip_vertical()) {
|
|
vertical_flip_output_ = options.flip_vertical();
|
|
} else {
|
|
vertical_flip_output_ = false;
|
|
}
|
|
if (options.has_flip_horizontal()) {
|
|
horizontal_flip_output_ = options.flip_horizontal();
|
|
} else {
|
|
horizontal_flip_output_ = false;
|
|
}
|
|
if (options.has_scale_mode()) {
|
|
scale_mode_ =
|
|
FrameScaleModeFromProto(options.scale_mode(), FrameScaleMode::kStretch);
|
|
}
|
|
|
|
if (HasTagOrIndex(cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
|
|
const auto& dimensions =
|
|
TagOrIndex(cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
|
|
.Get<DimensionsPacketType>();
|
|
dst_width_ = dimensions[0];
|
|
dst_height_ = dimensions[1];
|
|
}
|
|
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
|
rotation_ccw = cc->InputSidePackets().Tag(kRotationTag).Get<int>();
|
|
}
|
|
|
|
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
|
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
|
if (cc->Inputs().Tag(kOutputDimensionsTag).IsEmpty()) {
|
|
// OUTPUT_DIMENSIONS input stream is specified, but value is missing.
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
const auto& dimensions =
|
|
cc->Inputs().Tag(kOutputDimensionsTag).Get<DimensionsPacketType>();
|
|
dst_width_ = dimensions[0];
|
|
dst_height_ = dimensions[1];
|
|
}
|
|
|
|
return helper_.RunInGlContext([this, cc]() -> absl::Status {
|
|
const auto& input =
|
|
cc->Inputs().HasTag(kImageTag)
|
|
? cc->Inputs().Tag(kImageTag).Get<Image>().GetGpuBuffer()
|
|
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
|
|
QuadRenderer* renderer = nullptr;
|
|
GlTexture src1;
|
|
GlTexture src2;
|
|
|
|
#ifdef __APPLE__
|
|
if (input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8VideoRange ||
|
|
input.format() == GpuBufferFormat::kBiPlanar420YpCbCr8FullRange) {
|
|
if (!yuv_renderer_) {
|
|
yuv_renderer_ = absl::make_unique<QuadRenderer>();
|
|
MP_RETURN_IF_ERROR(yuv_renderer_->GlSetup(
|
|
kYUV2TexToRGBFragmentShader, {"video_frame_y", "video_frame_uv"}));
|
|
}
|
|
renderer = yuv_renderer_.get();
|
|
src1 = helper_.CreateSourceTexture(input, 0);
|
|
src2 = helper_.CreateSourceTexture(input, 1);
|
|
} else // NOLINT(readability/braces)
|
|
#endif // __APPLE__
|
|
{
|
|
src1 = helper_.CreateSourceTexture(input);
|
|
#ifdef __ANDROID__
|
|
if (src1.target() == GL_TEXTURE_EXTERNAL_OES) {
|
|
if (!ext_rgb_renderer_) {
|
|
ext_rgb_renderer_ = absl::make_unique<QuadRenderer>();
|
|
MP_RETURN_IF_ERROR(ext_rgb_renderer_->GlSetup(
|
|
kBasicTexturedFragmentShaderOES, {"video_frame"}));
|
|
}
|
|
renderer = ext_rgb_renderer_.get();
|
|
} else // NOLINT(readability/braces)
|
|
#endif // __ANDROID__
|
|
{
|
|
if (!rgb_renderer_) {
|
|
rgb_renderer_ = absl::make_unique<QuadRenderer>();
|
|
MP_RETURN_IF_ERROR(rgb_renderer_->GlSetup());
|
|
}
|
|
renderer = rgb_renderer_.get();
|
|
}
|
|
}
|
|
RET_CHECK(renderer) << "Unsupported input texture type";
|
|
|
|
// Override input side packet if ROTATION input packet is provided.
|
|
if (cc->Inputs().HasTag(kRotationTag)) {
|
|
int rotation_ccw = cc->Inputs().Tag(kRotationTag).Get<int>();
|
|
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
|
}
|
|
|
|
int dst_width;
|
|
int dst_height;
|
|
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
|
|
|
|
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
|
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
|
float top_bottom_padding;
|
|
float left_right_padding;
|
|
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
|
|
&top_bottom_padding, &left_right_padding);
|
|
cc->Outputs()
|
|
.Tag(kTopBottomPaddingTag)
|
|
.AddPacket(
|
|
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
|
|
cc->Outputs()
|
|
.Tag(kLeftRightPaddingTag)
|
|
.AddPacket(
|
|
MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
|
|
}
|
|
|
|
auto dst = helper_.CreateDestinationTexture(dst_width, dst_height,
|
|
GetOutputFormat());
|
|
|
|
helper_.BindFramebuffer(dst);
|
|
glActiveTexture(GL_TEXTURE1);
|
|
glBindTexture(src1.target(), src1.name());
|
|
if (src2.name()) {
|
|
glActiveTexture(GL_TEXTURE2);
|
|
glBindTexture(src2.target(), src2.name());
|
|
}
|
|
|
|
MP_RETURN_IF_ERROR(renderer->GlRender(
|
|
src1.width(), src1.height(), dst.width(), dst.height(), scale_mode_,
|
|
rotation_, horizontal_flip_output_, vertical_flip_output_,
|
|
/*flip_texture*/ false));
|
|
|
|
glActiveTexture(GL_TEXTURE1);
|
|
glBindTexture(src1.target(), 0);
|
|
if (src2.name()) {
|
|
glActiveTexture(GL_TEXTURE2);
|
|
glBindTexture(src2.target(), 0);
|
|
}
|
|
|
|
glFlush();
|
|
|
|
if (cc->Outputs().HasTag(kImageTag)) {
|
|
auto output = dst.GetFrame<Image>();
|
|
cc->Outputs().Tag(kImageTag).Add(output.release(), cc->InputTimestamp());
|
|
} else {
|
|
auto output = dst.GetFrame<GpuBuffer>();
|
|
TagOrIndex(&cc->Outputs(), "VIDEO", 0)
|
|
.Add(output.release(), cc->InputTimestamp());
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
});
|
|
}
|
|
|
|
void GlScalerCalculator::GetOutputDimensions(int src_width, int src_height,
|
|
int* dst_width, int* dst_height) {
|
|
if (dst_width_ > 0 && dst_height_ > 0) {
|
|
*dst_width = dst_width_;
|
|
*dst_height = dst_height_;
|
|
return;
|
|
}
|
|
if (dst_scale_ > 0) {
|
|
// Scales the destination size, but just uses src size as a temporary for
|
|
// calculations.
|
|
src_width = static_cast<int>(src_width * dst_scale_);
|
|
src_height = static_cast<int>(src_height * dst_scale_);
|
|
// Round to nearest multiply of 4 for better memory alignment.
|
|
src_width = ((src_width + 2) >> 2) << 2;
|
|
src_height = ((src_height + 2) >> 2) << 2;
|
|
}
|
|
if (rotation_ == FrameRotation::k90 || rotation_ == FrameRotation::k270) {
|
|
*dst_width = src_height;
|
|
*dst_height = src_width;
|
|
} else {
|
|
*dst_width = src_width;
|
|
*dst_height = src_height;
|
|
}
|
|
}
|
|
|
|
void GlScalerCalculator::GetOutputPadding(int src_width, int src_height,
|
|
int dst_width, int dst_height,
|
|
float* top_bottom_padding,
|
|
float* left_right_padding) {
|
|
*top_bottom_padding = 0.0f;
|
|
*left_right_padding = 0.0f;
|
|
if (rotation_ == FrameRotation::k90 || rotation_ == FrameRotation::k270) {
|
|
const int tmp = src_width;
|
|
src_width = src_height;
|
|
src_height = tmp;
|
|
}
|
|
if (scale_mode_ == FrameScaleMode::kFit) {
|
|
const float src_scale = 1.0f * src_width / src_height;
|
|
const float dst_scale = 1.0f * dst_width / dst_height;
|
|
if (src_scale - dst_scale > 1e-5) {
|
|
// Total padding on top and bottom sides.
|
|
*top_bottom_padding =
|
|
1.0f - 1.0f * dst_width / src_width * src_height / dst_height;
|
|
// Get padding on each side.
|
|
*top_bottom_padding /= 2.0f;
|
|
|
|
} else if (dst_scale - src_scale > 1e-5) {
|
|
// Total padding on left and right sides.
|
|
*left_right_padding =
|
|
1.0f - 1.0f / dst_width * src_width / src_height * dst_height;
|
|
// Get padding on each side.
|
|
*left_right_padding /= 2.0f;
|
|
}
|
|
}
|
|
}
|
|
|
|
GlScalerCalculator::~GlScalerCalculator() {
|
|
// TODO: use move capture when we have C++14 or better.
|
|
QuadRenderer* rgb_renderer = rgb_renderer_.release();
|
|
QuadRenderer* yuv_renderer = yuv_renderer_.release();
|
|
if (rgb_renderer || yuv_renderer) {
|
|
helper_.RunInGlContext([rgb_renderer, yuv_renderer] {
|
|
if (rgb_renderer) {
|
|
rgb_renderer->GlTeardown();
|
|
delete rgb_renderer;
|
|
}
|
|
if (yuv_renderer) {
|
|
yuv_renderer->GlTeardown();
|
|
delete yuv_renderer;
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
} // namespace mediapipe
|