// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "mediapipe/calculators/image/mask_overlay_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_simple_shaders.h" #include "mediapipe/gpu/shader_util.h" enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; namespace mediapipe { using ::mediapipe::MaskOverlayCalculatorOptions_MaskChannel_ALPHA; using ::mediapipe::MaskOverlayCalculatorOptions_MaskChannel_RED; using ::mediapipe::MaskOverlayCalculatorOptions_MaskChannel_UNKNOWN; // Mixes two frames using a third mask frame or constant value. // // Inputs: // VIDEO:[0,1] (GpuBuffer): // Two inputs should be provided. // MASK (GpuBuffer): // Optional. // Where the mask is 0, VIDEO:0 will be used. Where it is 1, VIDEO:1. // Intermediate values will blend. // If not specified, CONST_MASK float must be present. // CONST_MASK (float): // Optional. // If not specified, MASK GpuBuffer must be present. // Similar to MASK GpuBuffer, but applied globally to every pixel. // // Outputs: // OUTPUT (GpuBuffer): // The mix. class MaskOverlayCalculator : public CalculatorBase { public: MaskOverlayCalculator() {} ~MaskOverlayCalculator(); static absl::Status GetContract(CalculatorContract* cc); absl::Status Open(CalculatorContext* cc) override; absl::Status Process(CalculatorContext* cc) override; absl::Status GlSetup( const MaskOverlayCalculatorOptions::MaskChannel mask_channel); absl::Status GlRender(const float mask_const); private: GlCalculatorHelper helper_; bool initialized_ = false; bool use_mask_tex_ = false; // Otherwise, use constant float value. GLuint program_ = 0; GLint unif_frame1_; GLint unif_frame2_; GLint unif_mask_; }; REGISTER_CALCULATOR(MaskOverlayCalculator); // static absl::Status MaskOverlayCalculator::GetContract(CalculatorContract* cc) { MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc)); cc->Inputs().Get("VIDEO", 0).Set(); cc->Inputs().Get("VIDEO", 1).Set(); if (cc->Inputs().HasTag("MASK")) cc->Inputs().Tag("MASK").Set(); else if (cc->Inputs().HasTag("CONST_MASK")) cc->Inputs().Tag("CONST_MASK").Set(); else return absl::Status(absl::StatusCode::kNotFound, "At least one mask input stream must be present."); cc->Outputs().Tag("OUTPUT").Set(); return absl::OkStatus(); } absl::Status MaskOverlayCalculator::Open(CalculatorContext* cc) { cc->SetOffset(TimestampDiff(0)); if (cc->Inputs().HasTag("MASK")) { use_mask_tex_ = true; } return helper_.Open(cc); } absl::Status MaskOverlayCalculator::Process(CalculatorContext* cc) { return helper_.RunInGlContext([this, &cc]() -> absl::Status { if (!initialized_) { const auto& options = cc->Options(); const auto mask_channel = options.mask_channel(); MP_RETURN_IF_ERROR(GlSetup(mask_channel)); initialized_ = true; } glDisable(GL_BLEND); const Packet& input1_packet = cc->Inputs().Get("VIDEO", 1).Value(); const Packet& mask_packet = use_mask_tex_ ? cc->Inputs().Tag("MASK").Value() : cc->Inputs().Tag("CONST_MASK").Value(); if (mask_packet.IsEmpty()) { cc->Outputs().Tag("OUTPUT").AddPacket(input1_packet); return absl::OkStatus(); } const auto& input0_buffer = cc->Inputs().Get("VIDEO", 0).Get(); const auto& input1_buffer = input1_packet.Get(); auto src1 = helper_.CreateSourceTexture(input0_buffer); auto src2 = helper_.CreateSourceTexture(input1_buffer); GlTexture mask_tex; if (use_mask_tex_) { const auto& mask_buffer = mask_packet.Get(); mask_tex = helper_.CreateSourceTexture(mask_buffer); } auto dst = helper_.CreateDestinationTexture(src1.width(), src1.height()); helper_.BindFramebuffer(dst); glActiveTexture(GL_TEXTURE1); glBindTexture(src1.target(), src1.name()); glActiveTexture(GL_TEXTURE2); glBindTexture(src2.target(), src2.name()); if (use_mask_tex_) { const float mask_const = -1; glActiveTexture(GL_TEXTURE3); glBindTexture(mask_tex.target(), mask_tex.name()); MP_RETURN_IF_ERROR(GlRender(mask_const)); glActiveTexture(GL_TEXTURE3); glBindTexture(mask_tex.target(), 0); } else { const float mask_const = mask_packet.Get(); MP_RETURN_IF_ERROR(GlRender(mask_const)); } glActiveTexture(GL_TEXTURE2); glBindTexture(src2.target(), 0); glActiveTexture(GL_TEXTURE1); glBindTexture(src1.target(), 0); glFlush(); auto output = dst.GetFrame(); src1.Release(); src2.Release(); if (use_mask_tex_) mask_tex.Release(); dst.Release(); cc->Outputs().Tag("OUTPUT").Add(output.release(), cc->InputTimestamp()); return absl::OkStatus(); }); } absl::Status MaskOverlayCalculator::GlSetup( const MaskOverlayCalculatorOptions::MaskChannel mask_channel) { // Load vertex and fragment shaders const GLint attr_location[NUM_ATTRIBUTES] = { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, }; const GLchar* attr_name[NUM_ATTRIBUTES] = { "position", "texture_coordinate", }; std::string mask_component; switch (mask_channel) { case MaskOverlayCalculatorOptions_MaskChannel_UNKNOWN: case MaskOverlayCalculatorOptions_MaskChannel_RED: mask_component = "r"; break; case MaskOverlayCalculatorOptions_MaskChannel_ALPHA: mask_component = "a"; break; } const std::string frag_src_tex = std::string(kMediaPipeFragmentShaderPreamble) + R"( DEFAULT_PRECISION(highp, float) in vec2 sample_coordinate; uniform sampler2D frame1; uniform sampler2D frame2; uniform sampler2D mask; void main() { vec4 color1 = texture2D(frame1, sample_coordinate); vec4 color2 = texture2D(frame2, sample_coordinate); vec4 weight = texture2D(mask, sample_coordinate); #define MASK_COMPONENT )" + mask_component + R"( gl_FragColor = mix(color1, color2, weight.MASK_COMPONENT); } )"; const GLchar* frag_src_const = R"( precision highp float; varying vec2 sample_coordinate; uniform sampler2D frame1; uniform sampler2D frame2; uniform float mask; void main() { vec4 color1 = texture2D(frame1, sample_coordinate); vec4 color2 = texture2D(frame2, sample_coordinate); float weight = mask; gl_FragColor = mix(color1, color2, weight); } )"; // shader program GlhCreateProgram(kBasicVertexShader, use_mask_tex_ ? frag_src_tex.c_str() : frag_src_const, NUM_ATTRIBUTES, &attr_name[0], attr_location, &program_); RET_CHECK(program_) << "Problem initializing the program."; unif_frame1_ = glGetUniformLocation(program_, "frame1"); unif_frame2_ = glGetUniformLocation(program_, "frame2"); unif_mask_ = glGetUniformLocation(program_, "mask"); return absl::OkStatus(); } absl::Status MaskOverlayCalculator::GlRender(const float mask_const) { glUseProgram(program_); glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, kBasicSquareVertices); glEnableVertexAttribArray(ATTRIB_VERTEX); glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, kBasicTextureVertices); glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION); glUniform1i(unif_frame1_, 1); glUniform1i(unif_frame2_, 2); if (use_mask_tex_) glUniform1i(unif_mask_, 3); else glUniform1f(unif_mask_, mask_const); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); return absl::OkStatus(); } MaskOverlayCalculator::~MaskOverlayCalculator() { helper_.RunInGlContext([this] { if (program_) { glDeleteProgram(program_); program_ = 0; } }); } } // namespace mediapipe