161 lines
6.7 KiB
161 lines
6.7 KiB
// Copyright 2021 The MediaPipe Authors.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
#include <memory>
#include "absl/memory/memory.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace {
cv::BorderTypes GetBorderModeForOpenCv(
AffineTransformation::BorderMode border_mode) {
switch (border_mode) {
case AffineTransformation::BorderMode::kZero:
case AffineTransformation::BorderMode::kReplicate:
class OpenCvRunner
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
absl::StatusOr<ImageFrame> Run(
const ImageFrame& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
// accepts and produces relative coordinates) should be adjusted to first
// normalize coordinates and then scale them.
// clang-format off
cv::Matx44f normalize_dst_coordinate({
1.0f / size.width, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f / size.height, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f scale_src_coordinate({
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
cv::Matx44f adjust_dst_coordinate;
cv::Matx44f adjust_src_coordinate;
// TODO: update to always use accurate implementation.
constexpr bool kOpenCvCompatibility = true;
if (kOpenCvCompatibility) {
adjust_dst_coordinate = normalize_dst_coordinate;
adjust_src_coordinate = scale_src_coordinate;
} else {
// To do an accurate affine image transformation and make "on-cpu" and
// "on-gpu" calculations aligned - extra offset is required to select
// correct pixels.
// Each destination pixel corresponds to some pixels region from source
// image.(In case of downscaling there can be more than one pixel.) The
// offset for x and y is calculated in the way, so pixel in the middle of
// the region is selected.
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
// without a rotation:
// 1. Each destination pixel corresponds to 10x5 region
// X range: [0, .. , 9]
// Y range: [0, .. , 4]
// 2. Considering we have __discrete__ pixels, the center of the region is
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
// (4.5, 2).
// 3. When using the above as an offset for every pixel select while
// downscaling, resulting pixels are:
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
// ..
// (4.5, 47), (14.5, 47), .., (94.5, 47)
// instead of:
// (0, 0), (10, 0), .. , (90, 0)
// (0, 5), (10, 7), .. , (90, 5)
// ..
// (0, 45), (10, 45), .., (90, 45)
// The latter looks shifted.
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
// transformation matrix should shift coordinates by (0.5, 0.5) as the
// very first step.
// Due to the above shift, transformed coordinates would be valid for
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
// T0 make it valid for __discrete__ pixels, transformation matrix should
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
// clang-format off
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
0.0f, 1.0f, 0.0f, 0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
0.0f, 1.0f, 0.0f, -0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
adjust_src_coordinate = shift_src * scale_src_coordinate;
cv::Matx44f transform(matrix.data());
cv::Matx44f transform_absolute =
adjust_src_coordinate * transform * adjust_dst_coordinate;
cv::Mat in_mat = formats::MatView(&input);
cv::Mat cv_affine_transform(2, 3, CV_32F);
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
ImageFrame out_image(input.Format(), size.width, size.height);
cv::Mat out_mat = formats::MatView(&out_image);
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
cv::Size(out_mat.cols, out_mat.rows),
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
return out_image;
} // namespace
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
CreateAffineTransformationOpenCvRunner() {
return absl::make_unique<OpenCvRunner>();
} // namespace mediapipe