mediapipe/mediapipe2/examples/desktop/autoflip/quality/scene_cropper.cc
2021-06-10 23:01:19 +00:00

172 lines
7.0 KiB
C++

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/quality/scene_cropper.h"
#include <memory>
#include "absl/memory/memory.h"
#include "mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h"
#include "mediapipe/examples/desktop/autoflip/quality/utils.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
// TODO: Parameterize FOV based on camera specs.
constexpr float kWidthFieldOfView = 60;
namespace mediapipe {
namespace autoflip {
absl::Status SceneCropper::ProcessKinematicPathSolver(
const SceneKeyFrameCropSummary& scene_summary,
const std::vector<int64>& scene_timestamps,
const std::vector<bool>& is_key_frames,
const std::vector<FocusPointFrame>& focus_point_frames,
const bool continue_last_scene, std::vector<cv::Mat>* all_xforms) {
// TODO: Fix upstream calculators to not crop beyond portrait target
// value.
/*
RET_CHECK(scene_summary.scene_frame_height() ==
scene_summary.crop_window_height())
<< "Kinematic path solver does not yet support horizontal cropping.";
*/
RET_CHECK(scene_timestamps.size() == focus_point_frames.size())
<< "Kinematic path solver does not yet support downsampled detections.";
if (!path_solver_initalized_ || !continue_last_scene) {
int min_location = scene_summary.crop_window_width() / 2;
int max_location = scene_summary.scene_frame_width() -
scene_summary.crop_window_width() / 2;
kinematic_path_solver_ = std::make_unique<KinematicPathSolver>(
camera_motion_options_.kinematic_options(), min_location, max_location,
static_cast<float>(frame_width_) / kWidthFieldOfView);
path_solver_initalized_ = true;
}
int keyframe_counter = 0;
for (int i = 0; i < is_key_frames.size(); i++) {
if (is_key_frames[i]) {
RET_CHECK_EQ(focus_point_frames[keyframe_counter].point().size(), 2)
<< "Expected focus_points to equal 2";
int observed_x = std::round(
focus_point_frames[keyframe_counter].point(0).norm_point_x() *
scene_summary.scene_frame_width());
MP_RETURN_IF_ERROR(kinematic_path_solver_->AddObservation(
observed_x, scene_timestamps[i]));
keyframe_counter++;
} else {
MP_RETURN_IF_ERROR(
kinematic_path_solver_->UpdatePrediction(scene_timestamps[i]));
}
int x_path;
MP_RETURN_IF_ERROR(kinematic_path_solver_->GetState(&x_path));
cv::Mat transform = cv::Mat::eye(2, 3, CV_32FC1);
transform.at<float>(0, 2) =
-(x_path - scene_summary.crop_window_width() / 2);
all_xforms->push_back(transform);
}
return absl::OkStatus();
}
absl::Status SceneCropper::CropFrames(
const SceneKeyFrameCropSummary& scene_summary,
const std::vector<int64>& scene_timestamps,
const std::vector<bool>& is_key_frames,
const std::vector<cv::Mat>& scene_frames_or_empty,
const std::vector<FocusPointFrame>& focus_point_frames,
const std::vector<FocusPointFrame>& prior_focus_point_frames,
int top_static_border_size, int bottom_static_border_size,
const bool continue_last_scene, std::vector<cv::Rect>* crop_from_location,
std::vector<cv::Mat>* cropped_frames) {
const int num_scene_frames = scene_timestamps.size();
RET_CHECK_GT(num_scene_frames, 0) << "No scene frames.";
RET_CHECK_EQ(focus_point_frames.size(), num_scene_frames)
<< "Wrong size of FocusPointFrames.";
const int frame_width = scene_summary.scene_frame_width();
const int frame_height = scene_summary.scene_frame_height();
const int crop_width = scene_summary.crop_window_width();
const int crop_height = scene_summary.crop_window_height();
RET_CHECK_GT(crop_width, 0) << "Crop width is non-positive.";
RET_CHECK_GT(crop_height, 0) << "Crop height is non-positive.";
RET_CHECK_LE(crop_width, frame_width) << "Crop width exceeds frame width.";
RET_CHECK_LE(crop_height, frame_height)
<< "Crop height exceeds frame height.";
RET_CHECK(camera_motion_options_.has_polynomial_path_solver() ||
camera_motion_options_.has_kinematic_options())
<< "No camera motion model selected.";
// Computes transforms.
std::vector<cv::Mat> scene_frame_xforms;
int num_prior = 0;
if (camera_motion_options_.has_polynomial_path_solver()) {
num_prior = prior_focus_point_frames.size();
std::vector<cv::Mat> all_xforms;
PolynomialRegressionPathSolver solver;
RET_CHECK_OK(solver.ComputeCameraPath(
focus_point_frames, prior_focus_point_frames, frame_width, frame_height,
crop_width, crop_height, &all_xforms));
scene_frame_xforms =
std::vector<cv::Mat>(all_xforms.begin() + num_prior, all_xforms.end());
// Convert the matrix from center-aligned to upper-left aligned.
for (cv::Mat& xform : scene_frame_xforms) {
cv::Mat affine_opencv = cv::Mat::eye(2, 3, CV_32FC1);
affine_opencv.at<float>(0, 2) =
-(xform.at<float>(0, 2) + frame_width / 2 - crop_width / 2);
affine_opencv.at<float>(1, 2) =
-(xform.at<float>(1, 2) + frame_height / 2 - crop_height / 2);
xform = affine_opencv;
}
} else if (camera_motion_options_.has_kinematic_options()) {
num_prior = 0;
MP_RETURN_IF_ERROR(ProcessKinematicPathSolver(
scene_summary, scene_timestamps, is_key_frames, focus_point_frames,
continue_last_scene, &scene_frame_xforms));
}
// Store the "crop from" location on the input frame for use with an external
// renderer.
for (int i = 0; i < num_scene_frames; i++) {
const int left = -(scene_frame_xforms[i].at<float>(0, 2));
const int top =
top_static_border_size - (scene_frame_xforms[i].at<float>(1, 2));
crop_from_location->push_back(cv::Rect(left, top, crop_width, crop_height));
}
// If no cropped_frames is passed in, return directly.
if (!cropped_frames) {
return absl::OkStatus();
}
RET_CHECK(!scene_frames_or_empty.empty())
<< "If |cropped_frames| != nullptr, scene_frames_or_empty must not be "
"empty.";
// Prepares cropped frames.
cropped_frames->resize(num_scene_frames);
for (int i = 0; i < num_scene_frames; ++i) {
(*cropped_frames)[i] = cv::Mat::zeros(crop_height, crop_width,
scene_frames_or_empty[i].type());
}
return AffineRetarget(cv::Size(crop_width, crop_height),
scene_frames_or_empty, scene_frame_xforms,
cropped_frames);
}
} // namespace autoflip
} // namespace mediapipe