172 lines
7.0 KiB
C++
172 lines
7.0 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/examples/desktop/autoflip/quality/scene_cropper.h"
|
|
|
|
#include <memory>
|
|
|
|
#include "absl/memory/memory.h"
|
|
#include "mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h"
|
|
#include "mediapipe/examples/desktop/autoflip/quality/utils.h"
|
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
#include "mediapipe/framework/port/status.h"
|
|
|
|
// TODO: Parameterize FOV based on camera specs.
|
|
constexpr float kWidthFieldOfView = 60;
|
|
|
|
namespace mediapipe {
|
|
namespace autoflip {
|
|
|
|
absl::Status SceneCropper::ProcessKinematicPathSolver(
|
|
const SceneKeyFrameCropSummary& scene_summary,
|
|
const std::vector<int64>& scene_timestamps,
|
|
const std::vector<bool>& is_key_frames,
|
|
const std::vector<FocusPointFrame>& focus_point_frames,
|
|
const bool continue_last_scene, std::vector<cv::Mat>* all_xforms) {
|
|
// TODO: Fix upstream calculators to not crop beyond portrait target
|
|
// value.
|
|
/*
|
|
RET_CHECK(scene_summary.scene_frame_height() ==
|
|
scene_summary.crop_window_height())
|
|
<< "Kinematic path solver does not yet support horizontal cropping.";
|
|
*/
|
|
|
|
RET_CHECK(scene_timestamps.size() == focus_point_frames.size())
|
|
<< "Kinematic path solver does not yet support downsampled detections.";
|
|
|
|
if (!path_solver_initalized_ || !continue_last_scene) {
|
|
int min_location = scene_summary.crop_window_width() / 2;
|
|
int max_location = scene_summary.scene_frame_width() -
|
|
scene_summary.crop_window_width() / 2;
|
|
kinematic_path_solver_ = std::make_unique<KinematicPathSolver>(
|
|
camera_motion_options_.kinematic_options(), min_location, max_location,
|
|
static_cast<float>(frame_width_) / kWidthFieldOfView);
|
|
path_solver_initalized_ = true;
|
|
}
|
|
int keyframe_counter = 0;
|
|
for (int i = 0; i < is_key_frames.size(); i++) {
|
|
if (is_key_frames[i]) {
|
|
RET_CHECK_EQ(focus_point_frames[keyframe_counter].point().size(), 2)
|
|
<< "Expected focus_points to equal 2";
|
|
int observed_x = std::round(
|
|
focus_point_frames[keyframe_counter].point(0).norm_point_x() *
|
|
scene_summary.scene_frame_width());
|
|
MP_RETURN_IF_ERROR(kinematic_path_solver_->AddObservation(
|
|
observed_x, scene_timestamps[i]));
|
|
keyframe_counter++;
|
|
} else {
|
|
MP_RETURN_IF_ERROR(
|
|
kinematic_path_solver_->UpdatePrediction(scene_timestamps[i]));
|
|
}
|
|
int x_path;
|
|
MP_RETURN_IF_ERROR(kinematic_path_solver_->GetState(&x_path));
|
|
cv::Mat transform = cv::Mat::eye(2, 3, CV_32FC1);
|
|
transform.at<float>(0, 2) =
|
|
-(x_path - scene_summary.crop_window_width() / 2);
|
|
all_xforms->push_back(transform);
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SceneCropper::CropFrames(
|
|
const SceneKeyFrameCropSummary& scene_summary,
|
|
const std::vector<int64>& scene_timestamps,
|
|
const std::vector<bool>& is_key_frames,
|
|
const std::vector<cv::Mat>& scene_frames_or_empty,
|
|
const std::vector<FocusPointFrame>& focus_point_frames,
|
|
const std::vector<FocusPointFrame>& prior_focus_point_frames,
|
|
int top_static_border_size, int bottom_static_border_size,
|
|
const bool continue_last_scene, std::vector<cv::Rect>* crop_from_location,
|
|
std::vector<cv::Mat>* cropped_frames) {
|
|
const int num_scene_frames = scene_timestamps.size();
|
|
RET_CHECK_GT(num_scene_frames, 0) << "No scene frames.";
|
|
RET_CHECK_EQ(focus_point_frames.size(), num_scene_frames)
|
|
<< "Wrong size of FocusPointFrames.";
|
|
|
|
const int frame_width = scene_summary.scene_frame_width();
|
|
const int frame_height = scene_summary.scene_frame_height();
|
|
const int crop_width = scene_summary.crop_window_width();
|
|
const int crop_height = scene_summary.crop_window_height();
|
|
RET_CHECK_GT(crop_width, 0) << "Crop width is non-positive.";
|
|
RET_CHECK_GT(crop_height, 0) << "Crop height is non-positive.";
|
|
RET_CHECK_LE(crop_width, frame_width) << "Crop width exceeds frame width.";
|
|
RET_CHECK_LE(crop_height, frame_height)
|
|
<< "Crop height exceeds frame height.";
|
|
|
|
RET_CHECK(camera_motion_options_.has_polynomial_path_solver() ||
|
|
camera_motion_options_.has_kinematic_options())
|
|
<< "No camera motion model selected.";
|
|
|
|
// Computes transforms.
|
|
|
|
std::vector<cv::Mat> scene_frame_xforms;
|
|
int num_prior = 0;
|
|
if (camera_motion_options_.has_polynomial_path_solver()) {
|
|
num_prior = prior_focus_point_frames.size();
|
|
std::vector<cv::Mat> all_xforms;
|
|
PolynomialRegressionPathSolver solver;
|
|
RET_CHECK_OK(solver.ComputeCameraPath(
|
|
focus_point_frames, prior_focus_point_frames, frame_width, frame_height,
|
|
crop_width, crop_height, &all_xforms));
|
|
|
|
scene_frame_xforms =
|
|
std::vector<cv::Mat>(all_xforms.begin() + num_prior, all_xforms.end());
|
|
|
|
// Convert the matrix from center-aligned to upper-left aligned.
|
|
for (cv::Mat& xform : scene_frame_xforms) {
|
|
cv::Mat affine_opencv = cv::Mat::eye(2, 3, CV_32FC1);
|
|
affine_opencv.at<float>(0, 2) =
|
|
-(xform.at<float>(0, 2) + frame_width / 2 - crop_width / 2);
|
|
affine_opencv.at<float>(1, 2) =
|
|
-(xform.at<float>(1, 2) + frame_height / 2 - crop_height / 2);
|
|
xform = affine_opencv;
|
|
}
|
|
} else if (camera_motion_options_.has_kinematic_options()) {
|
|
num_prior = 0;
|
|
MP_RETURN_IF_ERROR(ProcessKinematicPathSolver(
|
|
scene_summary, scene_timestamps, is_key_frames, focus_point_frames,
|
|
continue_last_scene, &scene_frame_xforms));
|
|
}
|
|
|
|
// Store the "crop from" location on the input frame for use with an external
|
|
// renderer.
|
|
for (int i = 0; i < num_scene_frames; i++) {
|
|
const int left = -(scene_frame_xforms[i].at<float>(0, 2));
|
|
const int top =
|
|
top_static_border_size - (scene_frame_xforms[i].at<float>(1, 2));
|
|
crop_from_location->push_back(cv::Rect(left, top, crop_width, crop_height));
|
|
}
|
|
|
|
// If no cropped_frames is passed in, return directly.
|
|
if (!cropped_frames) {
|
|
return absl::OkStatus();
|
|
}
|
|
RET_CHECK(!scene_frames_or_empty.empty())
|
|
<< "If |cropped_frames| != nullptr, scene_frames_or_empty must not be "
|
|
"empty.";
|
|
// Prepares cropped frames.
|
|
cropped_frames->resize(num_scene_frames);
|
|
for (int i = 0; i < num_scene_frames; ++i) {
|
|
(*cropped_frames)[i] = cv::Mat::zeros(crop_height, crop_width,
|
|
scene_frames_or_empty[i].type());
|
|
}
|
|
return AffineRetarget(cv::Size(crop_width, crop_height),
|
|
scene_frames_or_empty, scene_frame_xforms,
|
|
cropped_frames);
|
|
}
|
|
|
|
} // namespace autoflip
|
|
} // namespace mediapipe
|