This is needed in Chrome builds to avoid collisions with its own LOG. PiperOrigin-RevId: 561436864
449 lines
19 KiB
C++
449 lines
19 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "mediapipe/examples/desktop/autoflip/quality/utils.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include <algorithm>
|
|
#include <utility>
|
|
|
|
#include "absl/log/absl_log.h"
|
|
#include "absl/memory/memory.h"
|
|
#include "mediapipe/examples/desktop/autoflip/quality/math_utils.h"
|
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
|
|
namespace mediapipe {
|
|
namespace autoflip {
|
|
namespace {
|
|
|
|
// Returns true if the first pair should be considered greater than the second.
|
|
// This is used to sort detections by scores (from high to low).
|
|
bool PairCompare(const std::pair<float, int>& pair1,
|
|
const std::pair<float, int>& pair2) {
|
|
return pair1.first > pair2.first;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
template <typename T>
|
|
void ScaleRect(const T& original_location, const double scale_x,
|
|
const double scale_y, Rect* scaled_location) {
|
|
scaled_location->set_x(round(original_location.x() * scale_x));
|
|
scaled_location->set_y(round(original_location.y() * scale_y));
|
|
scaled_location->set_width(round(original_location.width() * scale_x));
|
|
scaled_location->set_height(round(original_location.height() * scale_y));
|
|
}
|
|
template void ScaleRect<Rect>(const Rect&, const double, const double, Rect*);
|
|
template void ScaleRect<RectF>(const RectF&, const double, const double, Rect*);
|
|
|
|
void NormalizedRectToRect(const RectF& normalized_location, const int width,
|
|
const int height, Rect* location) {
|
|
ScaleRect(normalized_location, width, height, location);
|
|
}
|
|
|
|
absl::Status ClampRect(const int width, const int height, Rect* location) {
|
|
return ClampRect(0, 0, width, height, location);
|
|
}
|
|
|
|
absl::Status ClampRect(const int x0, const int y0, const int x1, const int y1,
|
|
Rect* location) {
|
|
RET_CHECK(!(location->x() >= x1 || location->x() + location->width() <= x0 ||
|
|
location->y() >= y1 || location->y() + location->height() <= y0));
|
|
|
|
int clamped_left, clamped_right, clamped_top, clamped_bottom;
|
|
RET_CHECK(MathUtil::Clamp(x0, x1, location->x(), &clamped_left));
|
|
RET_CHECK(MathUtil::Clamp(x0, x1, location->x() + location->width(),
|
|
&clamped_right));
|
|
RET_CHECK(MathUtil::Clamp(y0, y1, location->y(), &clamped_top));
|
|
RET_CHECK(MathUtil::Clamp(y0, y1, location->y() + location->height(),
|
|
&clamped_bottom));
|
|
location->set_x(clamped_left);
|
|
location->set_y(clamped_top);
|
|
location->set_width(std::max(0, clamped_right - clamped_left));
|
|
location->set_height(std::max(0, clamped_bottom - clamped_top));
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
void RectUnion(const Rect& rect_to_add, Rect* rect) {
|
|
const int x1 = std::min(rect->x(), rect_to_add.x());
|
|
const int y1 = std::min(rect->y(), rect_to_add.y());
|
|
const int x2 = std::max(rect->x() + rect->width(),
|
|
rect_to_add.x() + rect_to_add.width());
|
|
const int y2 = std::max(rect->y() + rect->height(),
|
|
rect_to_add.y() + rect_to_add.height());
|
|
rect->set_x(x1);
|
|
rect->set_y(y1);
|
|
rect->set_width(x2 - x1);
|
|
rect->set_height(y2 - y1);
|
|
}
|
|
|
|
absl::Status PackKeyFrameInfo(const int64_t frame_timestamp_ms,
|
|
const DetectionSet& detections,
|
|
const int original_frame_width,
|
|
const int original_frame_height,
|
|
const int feature_frame_width,
|
|
const int feature_frame_height,
|
|
KeyFrameInfo* key_frame_info) {
|
|
RET_CHECK(key_frame_info != nullptr) << "KeyFrameInfo is null";
|
|
RET_CHECK(original_frame_width > 0 && original_frame_height > 0 &&
|
|
feature_frame_width > 0 && feature_frame_height > 0)
|
|
<< "Invalid frame size.";
|
|
|
|
const double scale_x =
|
|
static_cast<double>(original_frame_width) / feature_frame_width;
|
|
const double scale_y =
|
|
static_cast<double>(original_frame_height) / feature_frame_height;
|
|
|
|
key_frame_info->set_timestamp_ms(frame_timestamp_ms);
|
|
|
|
// Scales detections and filter out the ones with no bounding boxes.
|
|
auto* processed_detections = key_frame_info->mutable_detections();
|
|
for (const auto& original_detection : detections.detections()) {
|
|
bool has_valid_location = true;
|
|
Rect location;
|
|
if (original_detection.has_location_normalized()) {
|
|
NormalizedRectToRect(original_detection.location_normalized(),
|
|
original_frame_width, original_frame_height,
|
|
&location);
|
|
} else if (original_detection.has_location()) {
|
|
ScaleRect(original_detection.location(), scale_x, scale_y, &location);
|
|
} else {
|
|
has_valid_location = false;
|
|
ABSL_LOG(ERROR) << "Detection missing a bounding box, skipped.";
|
|
}
|
|
if (has_valid_location) {
|
|
if (!ClampRect(original_frame_width, original_frame_height, &location)
|
|
.ok()) {
|
|
ABSL_LOG(ERROR) << "Invalid detection bounding box, skipped.";
|
|
continue;
|
|
}
|
|
auto* detection = processed_detections->add_detections();
|
|
*detection = original_detection;
|
|
*(detection->mutable_location()) = location;
|
|
}
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SortDetections(const DetectionSet& detections,
|
|
std::vector<SalientRegion>* required_regions,
|
|
std::vector<SalientRegion>* non_required_regions) {
|
|
required_regions->clear();
|
|
non_required_regions->clear();
|
|
|
|
// Makes pairs of score and index.
|
|
std::vector<std::pair<float, int>> required_score_idx_pairs;
|
|
std::vector<std::pair<float, int>> non_required_score_idx_pairs;
|
|
for (int i = 0; i < detections.detections_size(); ++i) {
|
|
const auto& detection = detections.detections(i);
|
|
const auto pair = std::make_pair(detection.score(), i);
|
|
if (detection.is_required()) {
|
|
required_score_idx_pairs.push_back(pair);
|
|
} else {
|
|
non_required_score_idx_pairs.push_back(pair);
|
|
}
|
|
}
|
|
|
|
// Sorts required regions by score.
|
|
std::stable_sort(required_score_idx_pairs.begin(),
|
|
required_score_idx_pairs.end(), PairCompare);
|
|
for (int i = 0; i < required_score_idx_pairs.size(); ++i) {
|
|
const int original_idx = required_score_idx_pairs[i].second;
|
|
required_regions->push_back(detections.detections(original_idx));
|
|
}
|
|
|
|
// Sorts non-required regions by score.
|
|
std::stable_sort(non_required_score_idx_pairs.begin(),
|
|
non_required_score_idx_pairs.end(), PairCompare);
|
|
for (int i = 0; i < non_required_score_idx_pairs.size(); ++i) {
|
|
const int original_idx = non_required_score_idx_pairs[i].second;
|
|
non_required_regions->push_back(detections.detections(original_idx));
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SetKeyFrameCropTarget(const int frame_width,
|
|
const int frame_height,
|
|
const double target_aspect_ratio,
|
|
KeyFrameCropOptions* crop_options) {
|
|
RET_CHECK_NE(crop_options, nullptr) << "KeyFrameCropOptions is null.";
|
|
RET_CHECK_GT(frame_width, 0) << "Frame width is non-positive.";
|
|
RET_CHECK_GT(frame_height, 0) << "Frame height is non-positive.";
|
|
RET_CHECK_GT(target_aspect_ratio, 0)
|
|
<< "Target aspect ratio is non-positive.";
|
|
const double input_aspect_ratio =
|
|
static_cast<double>(frame_width) / frame_height;
|
|
const int crop_target_width =
|
|
target_aspect_ratio < input_aspect_ratio
|
|
? std::round(frame_height * target_aspect_ratio)
|
|
: frame_width;
|
|
const int crop_target_height =
|
|
target_aspect_ratio < input_aspect_ratio
|
|
? frame_height
|
|
: std::round(frame_width / target_aspect_ratio);
|
|
crop_options->set_target_width(crop_target_width);
|
|
crop_options->set_target_height(crop_target_height);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status AggregateKeyFrameResults(
|
|
const KeyFrameCropOptions& key_frame_crop_options,
|
|
const std::vector<KeyFrameCropResult>& key_frame_crop_results,
|
|
const int scene_frame_width, const int scene_frame_height,
|
|
SceneKeyFrameCropSummary* scene_summary) {
|
|
RET_CHECK_NE(scene_summary, nullptr)
|
|
<< "Output SceneKeyFrameCropSummary is null.";
|
|
|
|
const int num_key_frames = key_frame_crop_results.size();
|
|
|
|
RET_CHECK_GT(scene_frame_width, 0) << "Non-positive frame width.";
|
|
RET_CHECK_GT(scene_frame_height, 0) << "Non-positive frame height.";
|
|
|
|
const int target_width = key_frame_crop_options.target_width();
|
|
const int target_height = key_frame_crop_options.target_height();
|
|
RET_CHECK_GT(target_width, 0) << "Non-positive target width.";
|
|
RET_CHECK_GT(target_height, 0) << "Non-positive target height.";
|
|
RET_CHECK_LE(target_width, scene_frame_width)
|
|
<< "Target width exceeds frame width.";
|
|
RET_CHECK_LE(target_height, scene_frame_height)
|
|
<< "Target height exceeds frame height.";
|
|
|
|
scene_summary->set_scene_frame_width(scene_frame_width);
|
|
scene_summary->set_scene_frame_height(scene_frame_height);
|
|
scene_summary->set_crop_window_width(target_width);
|
|
scene_summary->set_crop_window_height(target_height);
|
|
|
|
// Handles the corner case of no key frames.
|
|
if (num_key_frames == 0) {
|
|
scene_summary->set_has_salient_region(false);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
scene_summary->set_num_key_frames(num_key_frames);
|
|
scene_summary->set_key_frame_center_min_x(scene_frame_width);
|
|
scene_summary->set_key_frame_center_max_x(0);
|
|
scene_summary->set_key_frame_center_min_y(scene_frame_height);
|
|
scene_summary->set_key_frame_center_max_y(0);
|
|
scene_summary->set_key_frame_min_score(std::numeric_limits<float>::max());
|
|
scene_summary->set_key_frame_max_score(0.0);
|
|
|
|
const float half_height = target_height / 2.0f;
|
|
const float half_width = target_width / 2.0f;
|
|
bool has_salient_region = false;
|
|
int num_success_frames = 0;
|
|
std::unique_ptr<Rect> required_crop_region_union = nullptr;
|
|
for (int i = 0; i < num_key_frames; ++i) {
|
|
auto* key_frame_compact_info = scene_summary->add_key_frame_compact_infos();
|
|
const auto& result = key_frame_crop_results[i];
|
|
key_frame_compact_info->set_timestamp_ms(result.timestamp_ms());
|
|
if (result.are_required_regions_covered_in_target_size()) {
|
|
num_success_frames++;
|
|
}
|
|
if (result.region_is_empty()) {
|
|
key_frame_compact_info->set_center_x(-1.0);
|
|
key_frame_compact_info->set_center_y(-1.0);
|
|
key_frame_compact_info->set_score(-1.0);
|
|
continue;
|
|
}
|
|
|
|
has_salient_region = true;
|
|
if (!result.required_region_is_empty()) {
|
|
if (required_crop_region_union == nullptr) {
|
|
required_crop_region_union =
|
|
absl::make_unique<Rect>(result.required_region());
|
|
} else {
|
|
RectUnion(result.required_region(), required_crop_region_union.get());
|
|
}
|
|
}
|
|
|
|
const auto& region = result.region();
|
|
float original_center_x = region.x() + region.width() / 2.0f;
|
|
float original_center_y = region.y() + region.height() / 2.0f;
|
|
RET_CHECK_GE(original_center_x, 0) << "Negative horizontal center.";
|
|
RET_CHECK_GE(original_center_y, 0) << "Negative vertical center.";
|
|
// Ensure that centered region of target size does not exceed frame size.
|
|
float center_x, center_y;
|
|
RET_CHECK(MathUtil::Clamp(half_width, scene_frame_width - half_width,
|
|
original_center_x, ¢er_x));
|
|
RET_CHECK(MathUtil::Clamp(half_height, scene_frame_height - half_height,
|
|
original_center_y, ¢er_y));
|
|
key_frame_compact_info->set_center_x(center_x);
|
|
key_frame_compact_info->set_center_y(center_y);
|
|
scene_summary->set_key_frame_center_min_x(
|
|
std::min(scene_summary->key_frame_center_min_x(), center_x));
|
|
scene_summary->set_key_frame_center_max_x(
|
|
std::max(scene_summary->key_frame_center_max_x(), center_x));
|
|
scene_summary->set_key_frame_center_min_y(
|
|
std::min(scene_summary->key_frame_center_min_y(), center_y));
|
|
scene_summary->set_key_frame_center_max_y(
|
|
std::max(scene_summary->key_frame_center_max_y(), center_y));
|
|
|
|
scene_summary->set_crop_window_width(
|
|
std::max(scene_summary->crop_window_width(), region.width()));
|
|
scene_summary->set_crop_window_height(
|
|
std::max(scene_summary->crop_window_height(), region.height()));
|
|
|
|
const float score = result.region_score();
|
|
RET_CHECK_GE(score, 0.0) << "Negative score.";
|
|
key_frame_compact_info->set_score(result.region_score());
|
|
scene_summary->set_key_frame_min_score(
|
|
std::min(scene_summary->key_frame_min_score(), score));
|
|
scene_summary->set_key_frame_max_score(
|
|
std::max(scene_summary->key_frame_max_score(), score));
|
|
}
|
|
|
|
scene_summary->set_has_salient_region(has_salient_region);
|
|
scene_summary->set_has_required_salient_region(required_crop_region_union !=
|
|
nullptr);
|
|
if (required_crop_region_union) {
|
|
*(scene_summary->mutable_key_frame_required_crop_region_union()) =
|
|
*required_crop_region_union;
|
|
}
|
|
const float success_rate =
|
|
static_cast<float>(num_success_frames) / num_key_frames;
|
|
scene_summary->set_frame_success_rate(success_rate);
|
|
const float motion_x =
|
|
static_cast<float>(scene_summary->key_frame_center_max_x() -
|
|
scene_summary->key_frame_center_min_x()) /
|
|
scene_frame_width;
|
|
scene_summary->set_horizontal_motion_amount(motion_x);
|
|
const float motion_y =
|
|
static_cast<float>(scene_summary->key_frame_center_max_y() -
|
|
scene_summary->key_frame_center_min_y()) /
|
|
scene_frame_height;
|
|
scene_summary->set_vertical_motion_amount(motion_y);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status ComputeSceneStaticBordersSize(
|
|
const std::vector<StaticFeatures>& static_features, int* top_border_size,
|
|
int* bottom_border_size) {
|
|
RET_CHECK(top_border_size) << "Output top border size is null.";
|
|
RET_CHECK(bottom_border_size) << "Output bottom border size is null.";
|
|
|
|
*top_border_size = -1;
|
|
for (int i = 0; i < static_features.size(); ++i) {
|
|
bool has_static_top_border = false;
|
|
for (const auto& feature : static_features[i].border()) {
|
|
if (feature.relative_position() == Border::TOP) {
|
|
has_static_top_border = true;
|
|
const int static_size = feature.border_position().height();
|
|
*top_border_size = (*top_border_size > 0)
|
|
? std::min(*top_border_size, static_size)
|
|
: static_size;
|
|
}
|
|
}
|
|
if (!has_static_top_border) {
|
|
*top_border_size = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*bottom_border_size = -1;
|
|
for (int i = 0; i < static_features.size(); ++i) {
|
|
bool has_static_bottom_border = false;
|
|
for (const auto& feature : static_features[i].border()) {
|
|
if (feature.relative_position() == Border::BOTTOM) {
|
|
has_static_bottom_border = true;
|
|
const int static_size = feature.border_position().height();
|
|
*bottom_border_size = (*bottom_border_size > 0)
|
|
? std::min(*bottom_border_size, static_size)
|
|
: static_size;
|
|
}
|
|
}
|
|
if (!has_static_bottom_border) {
|
|
*bottom_border_size = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*top_border_size = std::max(0, *top_border_size);
|
|
*bottom_border_size = std::max(0, *bottom_border_size);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status FindSolidBackgroundColor(
|
|
const std::vector<StaticFeatures>& static_features,
|
|
const std::vector<int64_t>& static_features_timestamps,
|
|
const double min_fraction_solid_background_color,
|
|
bool* has_solid_background,
|
|
PiecewiseLinearFunction* background_color_l_function,
|
|
PiecewiseLinearFunction* background_color_a_function,
|
|
PiecewiseLinearFunction* background_color_b_function) {
|
|
RET_CHECK(has_solid_background) << "Output boolean is null.";
|
|
RET_CHECK(background_color_l_function) << "Output color l function is null.";
|
|
RET_CHECK(background_color_a_function) << "Output color a function is null.";
|
|
RET_CHECK(background_color_b_function) << "Output color b function is null.";
|
|
|
|
*has_solid_background = false;
|
|
int solid_background_frames = 0;
|
|
for (int i = 0; i < static_features.size(); ++i) {
|
|
if (static_features[i].has_solid_background()) {
|
|
solid_background_frames++;
|
|
const auto& color = static_features[i].solid_background();
|
|
const int64_t timestamp = static_features_timestamps[i];
|
|
// BorderDetectionCalculator sets color assuming the input frame is
|
|
// BGR, but in reality we have RGB, so we need to revert it here.
|
|
// TODO remove this custom logic in BorderDetectionCalculator,
|
|
// original CroppingCalculator, and this calculator.
|
|
cv::Mat3f rgb_mat(1, 1, cv::Vec3b(color.b(), color.g(), color.r()));
|
|
// Necessary scaling of the RGB values from [0, 255] to [0, 1] based on:
|
|
// https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor
|
|
rgb_mat *= 1.0 / 255;
|
|
cv::Mat3f lab_mat(1, 1);
|
|
cv::cvtColor(rgb_mat, lab_mat, cv::COLOR_RGB2Lab);
|
|
// TODO change to piecewise constant interpolation if there is
|
|
// visual artifact. We can simply add one more point right before the
|
|
// next point with same value to mimic piecewise constant behavior.
|
|
const auto lab = lab_mat.at<cv::Vec3f>(0, 0);
|
|
background_color_l_function->AddPoint(timestamp, lab[0]);
|
|
background_color_a_function->AddPoint(timestamp, lab[1]);
|
|
background_color_b_function->AddPoint(timestamp, lab[2]);
|
|
}
|
|
}
|
|
|
|
if (!static_features.empty() &&
|
|
static_cast<float>(solid_background_frames) / static_features.size() >=
|
|
min_fraction_solid_background_color) {
|
|
*has_solid_background = true;
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status AffineRetarget(const cv::Size& output_size,
|
|
const std::vector<cv::Mat>& frames,
|
|
const std::vector<cv::Mat>& affine_projection,
|
|
std::vector<cv::Mat>* cropped_frames) {
|
|
RET_CHECK(frames.size() == affine_projection.size())
|
|
<< "number of frames and retarget offsets must be the same.";
|
|
RET_CHECK(cropped_frames->size() == frames.size())
|
|
<< "Output vector cropped_frames must be populated with output images of "
|
|
"the same type, size and count.";
|
|
for (int i = 0; i < frames.size(); i++) {
|
|
RET_CHECK(frames[i].type() == (*cropped_frames)[i].type())
|
|
<< "input and output images must be the same type.";
|
|
const auto affine = affine_projection[i];
|
|
RET_CHECK(affine.cols == 3) << "Affine matrix must be 2x3";
|
|
RET_CHECK(affine.rows == 2) << "Affine matrix must be 2x3";
|
|
cv::warpAffine(frames[i], (*cropped_frames)[i], affine, output_size);
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
} // namespace autoflip
|
|
} // namespace mediapipe
|