Project import generated by Copybara.

GitOrigin-RevId: 0517756260533d374df93679965ca662d0ec6943
This commit is contained in:
MediaPipe Team 2020-01-09 17:51:05 -08:00 committed by Hadon Nash
parent 38ee2603a7
commit ae6be10afe
192 changed files with 16410 additions and 592 deletions

View File

@ -12,7 +12,7 @@ build --copt='-Wno-comment'
build --copt='-Wno-return-type'
build --copt='-Wno-unused-local-typedefs'
build --copt='-Wno-ignored-attributes'
# Temporarily set the incompatiblity flag for Bazel 0.27.0 and above
# Temporarily set the incompatibility flag for Bazel 0.27.0 and above
build --incompatible_disable_deprecated_attr_params=false
build --incompatible_depset_is_not_iterable=false

View File

@ -15,6 +15,7 @@
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md)
* [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
* [AutoFlip](mediapipe/docs/autoflip.md)
![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif)
![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif)

View File

@ -10,7 +10,9 @@ http_archive(
sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e",
)
load("@bazel_skylib//lib:versions.bzl", "versions")
versions.check(minimum_bazel_version = "0.24.1")
versions.check(minimum_bazel_version = "0.24.1",
maximum_bazel_version = "1.2.1")
# ABSL cpp library lts_2019_08_08.
http_archive(

View File

@ -47,6 +47,13 @@ proto_library(
deps = ["//mediapipe/framework:calculator_proto"],
)
proto_library(
name = "packet_thinner_calculator_proto",
srcs = ["packet_thinner_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
)
proto_library(
name = "split_vector_calculator_proto",
srcs = ["split_vector_calculator.proto"],
@ -102,6 +109,14 @@ mediapipe_cc_proto_library(
deps = [":packet_resampler_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "packet_thinner_calculator_cc_proto",
srcs = ["packet_thinner_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":packet_thinner_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "split_vector_calculator_cc_proto",
srcs = ["split_vector_calculator.proto"],
@ -284,7 +299,6 @@ cc_test(
srcs = ["concatenate_vector_calculator_test.cc"],
deps = [
":concatenate_vector_calculator",
"//mediapipe/calculators/core:packet_resampler_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework:timestamp",
@ -451,6 +465,37 @@ cc_test(
],
)
cc_library(
name = "packet_thinner_calculator",
srcs = ["packet_thinner_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/core:packet_thinner_calculator_cc_proto",
"//mediapipe/framework:calculator_context",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
cc_test(
name = "packet_thinner_calculator_test",
srcs = ["packet_thinner_calculator_test.cc"],
deps = [
":packet_thinner_calculator",
"//mediapipe/calculators/core:packet_thinner_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "pass_through_calculator",
srcs = ["pass_through_calculator.cc"],
@ -572,6 +617,7 @@ cc_test(
cc_library(
name = "packet_resampler_calculator",
srcs = ["packet_resampler_calculator.cc"],
hdrs = ["packet_resampler_calculator.h"],
visibility = [
"//visibility:public",
],
@ -595,17 +641,17 @@ cc_library(
cc_test(
name = "packet_resampler_calculator_test",
timeout = "short",
srcs = ["packet_resampler_calculator_test.cc"],
srcs = [
"packet_resampler_calculator_test.cc",
],
deps = [
":packet_resampler_calculator",
"//mediapipe/calculators/core:packet_resampler_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
@ -698,7 +744,13 @@ cc_library(
"//mediapipe/util:resource_util",
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
],
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//mediapipe:ios": [],
"//conditions:default": [
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
],
}),
alwayslink = 1,
)

View File

@ -12,23 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <memory>
#include <string>
#include "mediapipe/calculators/core/packet_resampler_calculator.h"
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/core/packet_resampler_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/deps/mathutil.h"
#include "mediapipe/framework/deps/random_base.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/tool/options_util.h"
#include <memory>
namespace {
@ -45,120 +31,7 @@ std::unique_ptr<RandomBase> CreateSecureRandom(const std::string& seed) {
namespace mediapipe {
// This calculator is used to normalize the frequency of the packets
// out of a stream. Given a desired frame rate, packets are going to be
// removed or added to achieve it.
//
// The jitter feature is disabled by default. To enable it, you need to
// implement CreateSecureRandom(const std::string&).
//
// The data stream may be either specified as the only stream (by index)
// or as the stream with tag "DATA".
//
// The input and output streams may be accompanied by a VIDEO_HEADER
// stream. This stream includes a VideoHeader at Timestamp::PreStream().
// The input VideoHeader on the VIDEO_HEADER stream will always be updated
// with the resampler frame rate no matter what the options value for
// output_header is before being output on the output VIDEO_HEADER stream.
// If the input VideoHeader is not available, then only the frame rate
// value will be set in the output.
//
// Related:
// packet_downsampler_calculator.cc: skips packets regardless of timestamps.
class PacketResamplerCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
// Calculates the first sampled timestamp that incorporates a jittering
// offset.
void InitializeNextOutputTimestampWithJitter();
// Calculates the next sampled timestamp that incorporates a jittering offset.
void UpdateNextOutputTimestampWithJitter();
// Logic for Process() when jitter_ != 0.0.
::mediapipe::Status ProcessWithJitter(CalculatorContext* cc);
// Logic for Process() when jitter_ == 0.0.
::mediapipe::Status ProcessWithoutJitter(CalculatorContext* cc);
// Given the current count of periods that have passed, this returns
// the next valid timestamp of the middle point of the next period:
// if count is 0, it returns the first_timestamp_.
// if count is 1, it returns the first_timestamp_ + period (corresponding
// to the first tick using exact fps)
// e.g. for frame_rate=30 and first_timestamp_=0:
// 0: 0
// 1: 33333
// 2: 66667
// 3: 100000
//
// Can only be used if jitter_ equals zero.
Timestamp PeriodIndexToTimestamp(int64 index) const;
// Given a Timestamp, finds the closest sync Timestamp based on
// first_timestamp_ and the desired fps.
//
// Can only be used if jitter_ equals zero.
int64 TimestampToPeriodIndex(Timestamp timestamp) const;
// Outputs a packet if it is in range (start_time_, end_time_).
void OutputWithinLimits(CalculatorContext* cc, const Packet& packet) const;
// The timestamp of the first packet received.
Timestamp first_timestamp_;
// Number of frames per second (desired output frequency).
double frame_rate_;
// Inverse of frame_rate_.
int64 frame_time_usec_;
// Number of periods that have passed (= #packets sent to the output).
//
// Can only be used if jitter_ equals zero.
int64 period_count_;
// The last packet that was received.
Packet last_packet_;
VideoHeader video_header_;
// The "DATA" input stream.
CollectionItemId input_data_id_;
// The "DATA" output stream.
CollectionItemId output_data_id_;
// Indicator whether to flush last packet even if its timestamp is greater
// than the final stream timestamp. Set to false when jitter_ is non-zero.
bool flush_last_packet_;
// Jitter-related variables.
std::unique_ptr<RandomBase> random_;
double jitter_ = 0.0;
Timestamp next_output_timestamp_;
// If specified, output timestamps are aligned with base_timestamp.
// Otherwise, they are aligned with the first input timestamp.
Timestamp base_timestamp_;
// If specified, only outputs at/after start_time are included.
Timestamp start_time_;
// If specified, only outputs before end_time are included.
Timestamp end_time_;
// If set, the output timestamps nearest to start_time and end_time
// are included in the output, even if the nearest timestamp is not
// between start_time and end_time.
bool round_limits_;
};
REGISTER_CALCULATOR(PacketResamplerCalculator);
namespace {
// Returns a TimestampDiff (assuming microseconds) corresponding to the
// given time in seconds.
@ -279,7 +152,10 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) {
"SecureRandom is not available. With \"jitter\" specified, "
"PacketResamplerCalculator processing cannot proceed.");
}
packet_reservoir_random_ = CreateSecureRandom(seed);
}
packet_reservoir_ =
std::make_unique<PacketReservoir>(packet_reservoir_random_.get());
return ::mediapipe::OkStatus();
}
@ -294,6 +170,14 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) {
}
}
if (jitter_ != 0.0 && random_ != nullptr) {
// Packet reservior is used to make sure there's an output for every period,
// e.g. partial period at the end of the stream.
if (packet_reservoir_->IsEnabled() &&
(first_timestamp_ == Timestamp::Unset() ||
(cc->InputTimestamp() - next_output_timestamp_min_).Value() >= 0)) {
auto curr_packet = cc->Inputs().Get(input_data_id_).Value();
packet_reservoir_->AddSample(curr_packet);
}
MP_RETURN_IF_ERROR(ProcessWithJitter(cc));
} else {
MP_RETURN_IF_ERROR(ProcessWithoutJitter(cc));
@ -303,11 +187,14 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) {
}
void PacketResamplerCalculator::InitializeNextOutputTimestampWithJitter() {
next_output_timestamp_min_ = first_timestamp_;
next_output_timestamp_ =
first_timestamp_ + frame_time_usec_ * random_->RandFloat();
}
void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
packet_reservoir_->Clear();
packet_reservoir_->Disable();
next_output_timestamp_ +=
frame_time_usec_ *
((1.0 - jitter_) + 2.0 * jitter_ * random_->RandFloat());
@ -339,10 +226,10 @@ void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
while (true) {
const int64 last_diff =
(next_output_timestamp_ - last_packet_.Timestamp()).Value();
RET_CHECK_GT(last_diff, 0.0);
RET_CHECK_GT(last_diff, 0);
const int64 curr_diff =
(next_output_timestamp_ - cc->InputTimestamp()).Value();
if (curr_diff > 0.0) {
if (curr_diff > 0) {
break;
}
OutputWithinLimits(cc, (std::abs(curr_diff) > last_diff
@ -431,6 +318,9 @@ void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
OutputWithinLimits(cc,
last_packet_.At(PeriodIndexToTimestamp(period_count_)));
}
if (!packet_reservoir_->IsEmpty()) {
OutputWithinLimits(cc, packet_reservoir_->GetSample());
}
return ::mediapipe::OkStatus();
}

View File

@ -0,0 +1,168 @@
#ifndef MEDIAPIPE_CALCULATORS_CORE_PACKET_RESAMPLER_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_CORE_PACKET_RESAMPLER_CALCULATOR_H_
#include <cstdlib>
#include <memory>
#include <string>
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/core/packet_resampler_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/deps/mathutil.h"
#include "mediapipe/framework/deps/random_base.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/tool/options_util.h"
namespace mediapipe {
class PacketReservoir {
public:
PacketReservoir(RandomBase* rng) : rng_(rng) {}
// Replace candidate with current packet with 1/count_ probability.
void AddSample(Packet sample) {
if (rng_->UnbiasedUniform(++count_) == 0) {
reservoir_ = sample;
}
}
bool IsEnabled() { return rng_ && enabled_; }
void Disable() {
if (enabled_) enabled_ = false;
}
void Clear() { count_ = 0; }
bool IsEmpty() { return count_ == 0; }
Packet GetSample() { return reservoir_; }
private:
RandomBase* rng_;
bool enabled_ = true;
int32 count_ = 0;
Packet reservoir_;
};
// This calculator is used to normalize the frequency of the packets
// out of a stream. Given a desired frame rate, packets are going to be
// removed or added to achieve it.
//
// The jitter feature is disabled by default. To enable it, you need to
// implement CreateSecureRandom(const std::string&).
//
// The data stream may be either specified as the only stream (by index)
// or as the stream with tag "DATA".
//
// The input and output streams may be accompanied by a VIDEO_HEADER
// stream. This stream includes a VideoHeader at Timestamp::PreStream().
// The input VideoHeader on the VIDEO_HEADER stream will always be updated
// with the resampler frame rate no matter what the options value for
// output_header is before being output on the output VIDEO_HEADER stream.
// If the input VideoHeader is not available, then only the frame rate
// value will be set in the output.
//
// Related:
// packet_downsampler_calculator.cc: skips packets regardless of timestamps.
class PacketResamplerCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
// Calculates the first sampled timestamp that incorporates a jittering
// offset.
void InitializeNextOutputTimestampWithJitter();
// Calculates the next sampled timestamp that incorporates a jittering offset.
void UpdateNextOutputTimestampWithJitter();
// Logic for Process() when jitter_ != 0.0.
::mediapipe::Status ProcessWithJitter(CalculatorContext* cc);
// Logic for Process() when jitter_ == 0.0.
::mediapipe::Status ProcessWithoutJitter(CalculatorContext* cc);
// Given the current count of periods that have passed, this returns
// the next valid timestamp of the middle point of the next period:
// if count is 0, it returns the first_timestamp_.
// if count is 1, it returns the first_timestamp_ + period (corresponding
// to the first tick using exact fps)
// e.g. for frame_rate=30 and first_timestamp_=0:
// 0: 0
// 1: 33333
// 2: 66667
// 3: 100000
//
// Can only be used if jitter_ equals zero.
Timestamp PeriodIndexToTimestamp(int64 index) const;
// Given a Timestamp, finds the closest sync Timestamp based on
// first_timestamp_ and the desired fps.
//
// Can only be used if jitter_ equals zero.
int64 TimestampToPeriodIndex(Timestamp timestamp) const;
// Outputs a packet if it is in range (start_time_, end_time_).
void OutputWithinLimits(CalculatorContext* cc, const Packet& packet) const;
// The timestamp of the first packet received.
Timestamp first_timestamp_;
// Number of frames per second (desired output frequency).
double frame_rate_;
// Inverse of frame_rate_.
int64 frame_time_usec_;
// Number of periods that have passed (= #packets sent to the output).
//
// Can only be used if jitter_ equals zero.
int64 period_count_;
// The last packet that was received.
Packet last_packet_;
VideoHeader video_header_;
// The "DATA" input stream.
CollectionItemId input_data_id_;
// The "DATA" output stream.
CollectionItemId output_data_id_;
// Indicator whether to flush last packet even if its timestamp is greater
// than the final stream timestamp. Set to false when jitter_ is non-zero.
bool flush_last_packet_;
// Jitter-related variables.
std::unique_ptr<RandomBase> random_;
double jitter_ = 0.0;
Timestamp next_output_timestamp_;
Timestamp next_output_timestamp_min_;
// If specified, output timestamps are aligned with base_timestamp.
// Otherwise, they are aligned with the first input timestamp.
Timestamp base_timestamp_;
// If specified, only outputs at/after start_time are included.
Timestamp start_time_;
// If specified, only outputs before end_time are included.
Timestamp end_time_;
// If set, the output timestamps nearest to start_time and end_time
// are included in the output, even if the nearest timestamp is not
// between start_time and end_time.W
bool round_limits_;
// packet reservior used for sampling random packet out of partial
// period when jitter is enabled
std::unique_ptr<PacketReservoir> packet_reservoir_;
// random number generator used in packet_reservior_.
std::unique_ptr<RandomBase> packet_reservoir_random_;
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_CORE_PACKET_RESAMPLER_CALCULATOR_H_

View File

@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/core/packet_resampler_calculator.h"
#include <memory>
#include <string>
#include <vector>
@ -29,7 +31,6 @@
namespace mediapipe {
namespace {
// A simple version of CalculatorRunner with built-in convenience
// methods for setting inputs from a vector and checking outputs
// against expected outputs (both timestamps and contents).

View File

@ -0,0 +1,304 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Declaration of PacketThinnerCalculator.
#include <cmath> // for ceil
#include <memory>
#include "mediapipe/calculators/core/packet_thinner_calculator.pb.h"
#include "mediapipe/framework/calculator_context.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
const double kTimebaseUs = 1000000; // Microseconds.
const char* const kPeriodTag = "PERIOD";
} // namespace
// This calculator is used to thin an input stream of Packets.
// An example application would be to sample decoded frames of video
// at a coarser temporal resolution. Unless otherwise stated, all
// timestamps are in units of microseconds.
//
// Thinning can be accomplished in one of two ways:
// 1) asynchronous thinning (known below as async):
// Algorithm does not rely on a master clock and is parameterized only
// by a single option -- the period. Once a packet is emitted, the
// thinner will discard subsequent packets for the duration of the period
// [Analogous to a refractory period during which packet emission is
// suppressed.]
// Packets arriving before start_time are discarded, as are packets
// arriving at or after end_time.
// 2) synchronous thinning (known below as sync):
// There are two variants of this algorithm, both parameterized by a
// start_time and a period. As in (1), packets arriving before start_time
// or at/after end_time are discarded. Otherwise, at most one packet is
// emitted during a period, centered at timestamps generated by the
// expression:
// start_time + i * period [where i is a non-negative integer]
// During each period, the packet closest to the generated timestamp is
// emitted (latest in the case of ties). In the first variant
// (sync_output_timestamps = true), the emitted packet is output at the
// generated timestamp. In the second variant, the packet is output at
// its original timestamp. Both variants emit exactly the same packets,
// but at different timestamps.
//
// Thinning period can be provided in the calculator options or via a
// side packet with the tag "PERIOD".
//
// Example config:
// node {
// calculator: "PacketThinnerCalculator"
// input_stream: "signal"
// output_stream: "output"
// options {
// [mediapipe.PacketThinnerCalculatorOptions.ext] {
// thinner_type: SYNC
// period: 10
// sync_output_timestamps: true
// update_frame_rate: false
// }
// }
// }
class PacketThinnerCalculator : public CalculatorBase {
public:
PacketThinnerCalculator() {}
~PacketThinnerCalculator() override {}
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).SetAny();
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
if (cc->InputSidePackets().HasTag(kPeriodTag)) {
cc->InputSidePackets().Tag(kPeriodTag).Set<int64>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override {
if (cc->InputTimestamp() < start_time_) {
return ::mediapipe::OkStatus(); // Drop packets before start_time_.
} else if (cc->InputTimestamp() >= end_time_) {
if (!cc->Outputs().Index(0).IsClosed()) {
cc->Outputs()
.Index(0)
.Close(); // No more Packets will be output after end_time_.
}
return ::mediapipe::OkStatus();
} else {
return thinner_type_ == PacketThinnerCalculatorOptions::ASYNC
? AsyncThinnerProcess(cc)
: SyncThinnerProcess(cc);
}
}
private:
// Implementation of ASYNC and SYNC versions of thinner algorithm.
::mediapipe::Status AsyncThinnerProcess(CalculatorContext* cc);
::mediapipe::Status SyncThinnerProcess(CalculatorContext* cc);
// Cached option.
PacketThinnerCalculatorOptions::ThinnerType thinner_type_;
// Given a Timestamp, finds the closest sync Timestamp
// based on start_time_ and period_. This can be earlier or
// later than given Timestamp, but is guaranteed to be within
// half a period_.
Timestamp NearestSyncTimestamp(Timestamp now) const;
// Cached option used by both async and sync thinners.
TimestampDiff period_; // Interval during which only one packet is emitted.
Timestamp start_time_; // Cached option - default Timestamp::Min()
Timestamp end_time_; // Cached option - default Timestamp::Max()
// Only used by async thinner:
Timestamp next_valid_timestamp_; // Suppress packets until this timestamp.
// Only used by sync thinner:
Packet saved_packet_; // Best packet not yet emitted.
bool sync_output_timestamps_; // Cached option.
};
REGISTER_CALCULATOR(PacketThinnerCalculator);
namespace {
TimestampDiff abs(TimestampDiff t) { return t < 0 ? -t : t; }
} // namespace
::mediapipe::Status PacketThinnerCalculator::Open(CalculatorContext* cc) {
auto& options = cc->Options<PacketThinnerCalculatorOptions>();
thinner_type_ = options.thinner_type();
// This check enables us to assume only two thinner types exist in Process()
CHECK(thinner_type_ == PacketThinnerCalculatorOptions::ASYNC ||
thinner_type_ == PacketThinnerCalculatorOptions::SYNC)
<< "Unsupported thinner type.";
if (thinner_type_ == PacketThinnerCalculatorOptions::ASYNC) {
// ASYNC thinner outputs packets with the same timestamp as their input so
// its safe to SetOffset(0). SYNC thinner manipulates timestamps of its
// output so we don't do this for that case.
cc->SetOffset(0);
}
if (cc->InputSidePackets().HasTag(kPeriodTag)) {
period_ =
TimestampDiff(cc->InputSidePackets().Tag(kPeriodTag).Get<int64>());
} else {
period_ = TimestampDiff(options.period());
}
CHECK_LT(TimestampDiff(0), period_) << "Specified period must be positive.";
if (options.has_start_time()) {
start_time_ = Timestamp(options.start_time());
} else if (thinner_type_ == PacketThinnerCalculatorOptions::ASYNC) {
start_time_ = Timestamp::Min();
} else {
start_time_ = Timestamp(0);
}
end_time_ =
options.has_end_time() ? Timestamp(options.end_time()) : Timestamp::Max();
CHECK_LT(start_time_, end_time_)
<< "Invalid PacketThinner: start_time must be earlier than end_time";
sync_output_timestamps_ = options.sync_output_timestamps();
next_valid_timestamp_ = start_time_;
// Drop packets until this time.
cc->Outputs().Index(0).SetNextTimestampBound(start_time_);
if (!cc->Inputs().Index(0).Header().IsEmpty()) {
if (options.update_frame_rate()) {
const VideoHeader& video_header =
cc->Inputs().Index(0).Header().Get<VideoHeader>();
double new_frame_rate;
if (thinner_type_ == PacketThinnerCalculatorOptions::ASYNC) {
new_frame_rate =
video_header.frame_rate /
ceil(video_header.frame_rate * options.period() / kTimebaseUs);
} else {
const double sampling_rate = kTimebaseUs / options.period();
new_frame_rate = video_header.frame_rate < sampling_rate
? video_header.frame_rate
: sampling_rate;
}
std::unique_ptr<VideoHeader> header(new VideoHeader);
header->format = video_header.format;
header->width = video_header.width;
header->height = video_header.height;
header->frame_rate = new_frame_rate;
cc->Outputs().Index(0).SetHeader(Adopt(header.release()));
} else {
cc->Outputs().Index(0).SetHeader(cc->Inputs().Index(0).Header());
}
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status PacketThinnerCalculator::Close(CalculatorContext* cc) {
// Emit any saved packets before quitting.
if (!saved_packet_.IsEmpty()) {
// Only sync thinner should have saved packets.
CHECK_EQ(PacketThinnerCalculatorOptions::SYNC, thinner_type_);
if (sync_output_timestamps_) {
cc->Outputs().Index(0).AddPacket(
saved_packet_.At(NearestSyncTimestamp(saved_packet_.Timestamp())));
} else {
cc->Outputs().Index(0).AddPacket(saved_packet_);
}
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status PacketThinnerCalculator::AsyncThinnerProcess(
CalculatorContext* cc) {
if (cc->InputTimestamp() >= next_valid_timestamp_) {
cc->Outputs().Index(0).AddPacket(
cc->Inputs().Index(0).Value()); // Emit current packet.
next_valid_timestamp_ = cc->InputTimestamp() + period_;
// Guaranteed not to emit packets seen during refractory period.
cc->Outputs().Index(0).SetNextTimestampBound(next_valid_timestamp_);
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status PacketThinnerCalculator::SyncThinnerProcess(
CalculatorContext* cc) {
if (saved_packet_.IsEmpty()) {
// If no packet has been saved, store the current packet.
saved_packet_ = cc->Inputs().Index(0).Value();
cc->Outputs().Index(0).SetNextTimestampBound(
sync_output_timestamps_ ? NearestSyncTimestamp(cc->InputTimestamp())
: cc->InputTimestamp());
} else {
// Saved packet exists -- update or emit.
const Timestamp saved = saved_packet_.Timestamp();
const Timestamp saved_sync = NearestSyncTimestamp(saved);
const Timestamp now = cc->InputTimestamp();
const Timestamp now_sync = NearestSyncTimestamp(now);
CHECK_LE(saved_sync, now_sync);
if (saved_sync == now_sync) {
// Saved Packet is in same interval as current packet.
// Replace saved packet with current if it is at least as
// central as the saved packet wrt temporal interval.
// [We break ties in favor of fresher packets]
if (abs(now - now_sync) <= abs(saved - saved_sync)) {
saved_packet_ = cc->Inputs().Index(0).Value();
}
} else {
// Saved packet is the best packet from earlier interval: emit!
if (sync_output_timestamps_) {
cc->Outputs().Index(0).AddPacket(saved_packet_.At(saved_sync));
cc->Outputs().Index(0).SetNextTimestampBound(now_sync);
} else {
cc->Outputs().Index(0).AddPacket(saved_packet_);
cc->Outputs().Index(0).SetNextTimestampBound(now);
}
// Current packet is the first one we've seen from new interval -- save!
saved_packet_ = cc->Inputs().Index(0).Value();
}
}
return ::mediapipe::OkStatus();
}
Timestamp PacketThinnerCalculator::NearestSyncTimestamp(Timestamp now) const {
CHECK_NE(start_time_, Timestamp::Unset())
<< "Method only valid for sync thinner calculator.";
// Computation is done using int64 arithmetic. No easy way to avoid
// since Timestamps don't support div and multiply.
const int64 now64 = now.Value();
const int64 start64 = start_time_.Value();
const int64 period64 = period_.Value();
CHECK_LE(0, period64);
// Round now64 to its closest interval (units of period64).
int64 sync64 =
(now64 - start64 + period64 / 2) / period64 * period64 + start64;
CHECK_LE(abs(now64 - sync64), period64 / 2)
<< "start64: " << start64 << "; now64: " << now64
<< "; sync64: " << sync64;
return Timestamp(sync64);
}
} // namespace mediapipe

View File

@ -0,0 +1,66 @@
// Copyright 2018 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message PacketThinnerCalculatorOptions {
extend CalculatorOptions {
optional PacketThinnerCalculatorOptions ext = 288533508;
}
enum ThinnerType {
ASYNC = 1; // Asynchronous thinner, described below [default].
SYNC = 2; // Synchronous thinner, also described below.
}
optional ThinnerType thinner_type = 1 [default = ASYNC];
// The period (in microsecond) specifies the temporal interval during which
// only a single packet is emitted in the output stream. Has subtly different
// semantics depending on the thinner type, as follows.
//
// Async thinner: this option is a refractory period -- once a packet is
// emitted, we guarantee that no packets will be emitted for period ticks.
//
// Sync thinner: the period specifies a temporal interval during which
// only one packet is emitted. The emitted packet is guaranteed to be
// the one closest to the center of the temporal interval (no guarantee on
// how ties are broken). More specifically,
// intervals are centered at start_time + i * period
// (for non-negative integers i).
// Thus, each interval extends period/2 ticks before and after its center.
// Additionally, in the sync thinner any packets earlier than start_time
// are discarded and the thinner calls Close() once timestamp equals or
// exceeds end_time.
optional int64 period = 2 [default = 1];
// Packets before start_time and at/after end_time are discarded.
// Additionally, for a sync thinner, start time specifies the center of
// time invervals as described above and therefore should be set explicitly.
optional int64 start_time = 3; // If not specified, set to 0 for SYNC type,
// and set to Timestamp::Min() for ASYNC type.
optional int64 end_time = 4; // Set to Timestamp::Max() if not specified.
// Whether the timestamps of packets emitted by sync thinner should
// correspond to the center of their corresponding temporal interval.
// If false, packets emitted using original timestamp (as in async thinner).
optional bool sync_output_timestamps = 5 [default = true];
// If true, update the frame rate in the header, if it's available, to an
// estimated frame rate due to the sampling.
optional bool update_frame_rate = 6 [default = false];
}

View File

@ -0,0 +1,357 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/core/packet_thinner_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
// A simple version of CalculatorRunner with built-in convenience methods for
// setting inputs from a vector and checking outputs against a vector of
// expected outputs.
class SimpleRunner : public CalculatorRunner {
public:
explicit SimpleRunner(const CalculatorOptions& options)
: CalculatorRunner("PacketThinnerCalculator", options) {
SetNumInputs(1);
SetNumOutputs(1);
SetNumInputSidePackets(0);
}
explicit SimpleRunner(const CalculatorGraphConfig::Node& node)
: CalculatorRunner(node) {}
void SetInput(const std::vector<int>& timestamp_list) {
MutableInputs()->Index(0).packets.clear();
for (const int ts : timestamp_list) {
MutableInputs()->Index(0).packets.push_back(
MakePacket<std::string>(absl::StrCat("Frame #", ts))
.At(Timestamp(ts)));
}
}
void SetFrameRate(const double frame_rate) {
auto video_header = absl::make_unique<VideoHeader>();
video_header->frame_rate = frame_rate;
MutableInputs()->Index(0).header = Adopt(video_header.release());
}
std::vector<int64> GetOutputTimestamps() const {
std::vector<int64> timestamps;
for (const Packet& packet : Outputs().Index(0).packets) {
timestamps.emplace_back(packet.Timestamp().Value());
}
return timestamps;
}
double GetFrameRate() const {
CHECK(!Outputs().Index(0).header.IsEmpty());
return Outputs().Index(0).header.Get<VideoHeader>().frame_rate;
}
};
// Check that thinner respects start_time and end_time options.
// We only test with one thinner because the logic for start & end time
// handling is shared across both types of thinner in Process().
TEST(PacketThinnerCalculatorTest, StartAndEndTimeTest) {
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_period(5);
extension->set_start_time(4);
extension->set_end_time(12);
SimpleRunner runner(options);
runner.SetInput({2, 3, 5, 7, 11, 13, 17, 19, 23, 29});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {5, 11};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, AsyncUniformStreamThinningTest) {
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_period(5);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 8, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, ASyncUniformStreamThinningTestBySidePacket) {
// Note: sync runner but outputting *original* timestamps.
CalculatorGraphConfig::Node node;
node.set_calculator("PacketThinnerCalculator");
node.add_input_side_packet("PERIOD:period");
node.add_input_stream("input_stream");
node.add_output_stream("output_stream");
auto* extension = node.mutable_options()->MutableExtension(
PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_start_time(0);
extension->set_sync_output_timestamps(false);
SimpleRunner runner(node);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 8, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, SyncUniformStreamThinningTest1) {
// Note: sync runner but outputting *original* timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(false);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 6, 10, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, SyncUniformStreamThinningTestBySidePacket1) {
// Note: sync runner but outputting *original* timestamps.
CalculatorGraphConfig::Node node;
node.set_calculator("PacketThinnerCalculator");
node.add_input_side_packet("PERIOD:period");
node.add_input_stream("input_stream");
node.add_output_stream("output_stream");
auto* extension = node.mutable_options()->MutableExtension(
PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_sync_output_timestamps(false);
SimpleRunner runner(node);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 6, 10, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, SyncUniformStreamThinningTest2) {
// Same test but now with synced timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(true);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {0, 5, 10, 15};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
// Test: Given a stream with timestamps corresponding to first ten prime numbers
// and period of 5, confirm whether timestamps of thinner stream matches
// expectations.
TEST(PacketThinnerCalculatorTest, PrimeStreamThinningTest1) {
// ASYNC thinner.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_period(5);
SimpleRunner runner(options);
runner.SetInput({2, 3, 5, 7, 11, 13, 17, 19, 23, 29});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 7, 13, 19, 29};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, PrimeStreamThinningTest2) {
// SYNC with original timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(false);
SimpleRunner runner(options);
runner.SetInput({2, 3, 5, 7, 11, 13, 17, 19, 23, 29});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 5, 11, 17, 19, 23, 29};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
// Confirm that Calculator correctly handles boundary cases.
TEST(PacketThinnerCalculatorTest, BoundaryTimestampTest1) {
// Odd period, negative start_time
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(-10);
extension->set_period(5);
extension->set_sync_output_timestamps(true);
SimpleRunner runner(options);
// Two timestamps falling on either side of a period boundary.
runner.SetInput({2, 3});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {0, 5};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, BoundaryTimestampTest2) {
// Even period, negative start_time, negative packet timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(-144);
extension->set_period(6);
extension->set_sync_output_timestamps(true);
SimpleRunner runner(options);
// Two timestamps falling on either side of a period boundary.
runner.SetInput({-4, -3, 8, 9});
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {-6, 0, 6, 12};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
}
TEST(PacketThinnerCalculatorTest, FrameRateTest1) {
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_period(5);
extension->set_update_frame_rate(true);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.SetFrameRate(1000000.0 / 2);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 8, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
// The true sampling period is 6.
EXPECT_DOUBLE_EQ(1000000.0 / 6, runner.GetFrameRate());
}
TEST(PacketThinnerCalculatorTest, FrameRateTest2) {
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::ASYNC);
extension->set_period(5);
extension->set_update_frame_rate(true);
SimpleRunner runner(options);
runner.SetInput({8, 16, 24, 32, 40, 48, 56});
runner.SetFrameRate(1000000.0 / 8);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {8, 16, 24, 32, 40, 48, 56};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
// The true sampling period is still 8.
EXPECT_DOUBLE_EQ(1000000.0 / 8, runner.GetFrameRate());
}
TEST(PacketThinnerCalculatorTest, FrameRateTest3) {
// Note: sync runner but outputting *original* timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(false);
extension->set_update_frame_rate(true);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.SetFrameRate(1000000.0 / 2);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 6, 10, 14};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
// The true (long-run) sampling period is 5.
EXPECT_DOUBLE_EQ(1000000.0 / 5, runner.GetFrameRate());
}
TEST(PacketThinnerCalculatorTest, FrameRateTest4) {
// Same test but now with synced timestamps.
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(true);
extension->set_update_frame_rate(true);
SimpleRunner runner(options);
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.SetFrameRate(1000000.0 / 2);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {0, 5, 10, 15};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
// The true (long-run) sampling period is 5.
EXPECT_DOUBLE_EQ(1000000.0 / 5, runner.GetFrameRate());
}
TEST(PacketThinnerCalculatorTest, FrameRateTest5) {
CalculatorOptions options;
auto* extension =
options.MutableExtension(PacketThinnerCalculatorOptions::ext);
extension->set_thinner_type(PacketThinnerCalculatorOptions::SYNC);
extension->set_start_time(0);
extension->set_period(5);
extension->set_sync_output_timestamps(true);
extension->set_update_frame_rate(true);
SimpleRunner runner(options);
runner.SetInput({8, 16, 24, 32, 40, 48, 56});
runner.SetFrameRate(1000000.0 / 8);
MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {10, 15, 25, 30, 40, 50, 55};
EXPECT_EQ(expected_timestamps, runner.GetOutputTimestamps());
// The true (long-run) sampling period is 8.
EXPECT_DOUBLE_EQ(1000000.0 / 8, runner.GetFrameRate());
}
} // namespace
} // namespace mediapipe

View File

@ -17,6 +17,7 @@
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/timestamp.h"
namespace mediapipe {
@ -86,6 +87,7 @@ class PreviousLoopbackCalculator : public CalculatorBase {
main_ts_.pop_front();
}
}
auto& loop_out = cc->Outputs().Get(loop_out_id_);
while (!main_ts_.empty() && !loopback_packets_.empty()) {
Timestamp main_timestamp = main_ts_.front();
@ -95,18 +97,31 @@ class PreviousLoopbackCalculator : public CalculatorBase {
if (previous_loopback.IsEmpty()) {
// TODO: SetCompleteTimestampBound would be more useful.
cc->Outputs()
.Get(loop_out_id_)
.SetNextTimestampBound(main_timestamp + 1);
loop_out.SetNextTimestampBound(main_timestamp + 1);
} else {
cc->Outputs().Get(loop_out_id_).AddPacket(std::move(previous_loopback));
loop_out.AddPacket(std::move(previous_loopback));
}
}
// In case of an empty loopback input, the next timestamp bound for
// loopback input is the loopback timestamp + 1. The next timestamp bound
// for output is set and the main_ts_ vector is truncated accordingly.
if (loopback_packet.IsEmpty() &&
loopback_packet.Timestamp() != Timestamp::Unstarted()) {
Timestamp loopback_bound =
loopback_packet.Timestamp().NextAllowedInStream();
while (!main_ts_.empty() && main_ts_.front() <= loopback_bound) {
main_ts_.pop_front();
}
if (main_ts_.empty()) {
loop_out.SetNextTimestampBound(loopback_bound.NextAllowedInStream());
}
}
if (!main_ts_.empty()) {
cc->Outputs().Get(loop_out_id_).SetNextTimestampBound(main_ts_.front());
loop_out.SetNextTimestampBound(main_ts_.front());
}
if (cc->Inputs().Get(main_id_).IsDone() && main_ts_.empty()) {
cc->Outputs().Get(loop_out_id_).Close();
loop_out.Close();
}
return ::mediapipe::OkStatus();
}

View File

@ -207,5 +207,64 @@ TEST(PreviousLoopbackCalculator, ClosesCorrectly) {
MP_EXPECT_OK(graph_.WaitUntilDone());
}
// Demonstrates that downstream calculators won't be blocked by
// always-empty-LOOP-stream.
TEST(PreviousLoopbackCalculator, EmptyLoopForever) {
std::vector<Packet> outputs;
CalculatorGraphConfig graph_config_ =
ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: 'in'
node {
calculator: 'PreviousLoopbackCalculator'
input_stream: 'MAIN:in'
input_stream: 'LOOP:previous'
input_stream_info: { tag_index: 'LOOP' back_edge: true }
output_stream: 'PREV_LOOP:previous'
}
# This calculator synchronizes its inputs as normal, so it is used
# to check that both "in" and "previous" are ready.
node {
calculator: 'PassThroughCalculator'
input_stream: 'in'
input_stream: 'previous'
output_stream: 'out'
output_stream: 'previous2'
}
node {
calculator: 'PacketOnCloseCalculator'
input_stream: 'out'
output_stream: 'close_out'
}
)");
tool::AddVectorSink("close_out", &graph_config_, &outputs);
CalculatorGraph graph_;
MP_ASSERT_OK(graph_.Initialize(graph_config_, {}));
MP_ASSERT_OK(graph_.StartRun({}));
auto send_packet = [&graph_](const std::string& input_name, int n) {
MP_EXPECT_OK(graph_.AddPacketToInputStream(
input_name, MakePacket<int>(n).At(Timestamp(n))));
};
send_packet("in", 0);
MP_EXPECT_OK(graph_.WaitUntilIdle());
EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{0}));
for (int main_ts = 1; main_ts < 50; ++main_ts) {
send_packet("in", main_ts);
MP_EXPECT_OK(graph_.WaitUntilIdle());
std::vector<int64> ts_values = TimestampValues(outputs);
EXPECT_EQ(ts_values.size(), main_ts + 1);
for (int j = 0; j < main_ts; ++j) {
EXPECT_EQ(ts_values[j], j);
}
}
MP_EXPECT_OK(graph_.CloseAllInputStreams());
MP_EXPECT_OK(graph_.WaitUntilIdle());
MP_EXPECT_OK(graph_.WaitUntilDone());
}
} // anonymous namespace
} // namespace mediapipe

View File

@ -20,6 +20,10 @@
#include "mediapipe/framework/formats/rect.pb.h"
#include "tensorflow/lite/interpreter.h"
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
// Example config:
@ -36,14 +40,21 @@ namespace mediapipe {
// }
// }
// }
typedef SplitVectorCalculator<TfLiteTensor> SplitTfLiteTensorVectorCalculator;
typedef SplitVectorCalculator<TfLiteTensor, false>
SplitTfLiteTensorVectorCalculator;
REGISTER_CALCULATOR(SplitTfLiteTensorVectorCalculator);
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark>
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark, false>
SplitLandmarkVectorCalculator;
REGISTER_CALCULATOR(SplitLandmarkVectorCalculator);
typedef SplitVectorCalculator<::mediapipe::NormalizedRect>
typedef SplitVectorCalculator<::mediapipe::NormalizedRect, false>
SplitNormalizedRectVectorCalculator;
REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator);
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
typedef SplitVectorCalculator<::tflite::gpu::gl::GlBuffer, true>
MovableSplitGlBufferVectorCalculator;
REGISTER_CALCULATOR(MovableSplitGlBufferVectorCalculator);
#endif
} // namespace mediapipe

View File

@ -15,12 +15,14 @@
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_VECTOR_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_VECTOR_CALCULATOR_H_
#include <type_traits>
#include <vector>
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/resource_util.h"
#include "tensorflow/lite/error_reporter.h"
#include "tensorflow/lite/interpreter.h"
@ -29,6 +31,20 @@
namespace mediapipe {
template <typename T>
using IsCopyable = std::enable_if_t<std::is_copy_constructible<T>::value, bool>;
template <typename T>
using IsNotCopyable =
std::enable_if_t<!std::is_copy_constructible<T>::value, bool>;
template <typename T>
using IsMovable = std::enable_if_t<std::is_move_constructible<T>::value, bool>;
template <typename T>
using IsNotMovable =
std::enable_if_t<!std::is_move_constructible<T>::value, bool>;
// Splits an input packet with std::vector<T> into multiple std::vector<T>
// output packets using the [begin, end) ranges specified in
// SplitVectorCalculatorOptions. If the option "element_only" is set to true,
@ -39,7 +55,7 @@ namespace mediapipe {
// combined into one vector.
// To use this class for a particular type T, register a calculator using
// SplitVectorCalculator<T>.
template <typename T>
template <typename T, bool move_elements>
class SplitVectorCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
@ -51,23 +67,16 @@ class SplitVectorCalculator : public CalculatorBase {
const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
if (!std::is_copy_constructible<T>::value || move_elements) {
// Ranges of elements shouldn't overlap when the vector contains
// non-copyable elements.
RET_CHECK_OK(checkRangesDontOverlap(options));
}
if (options.combine_outputs()) {
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Outputs().Index(0).Set<std::vector<T>>();
for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i);
const auto& range_1 = options.ranges(j);
if ((range_0.begin() >= range_1.begin() &&
range_0.begin() < range_1.end()) ||
(range_1.begin() >= range_0.begin() &&
range_1.begin() < range_0.end())) {
return ::mediapipe::InvalidArgumentError(
"Ranges must be non-overlapping when using combine_outputs "
"option.");
}
}
}
RET_CHECK_OK(checkRangesDontOverlap(options));
} else {
if (cc->Outputs().NumEntries() != options.ranges_size()) {
return ::mediapipe::InvalidArgumentError(
@ -117,14 +126,26 @@ class SplitVectorCalculator : public CalculatorBase {
}
::mediapipe::Status Process(CalculatorContext* cc) override {
const auto& input = cc->Inputs().Index(0).Get<std::vector<T>>();
RET_CHECK_GE(input.size(), max_range_end_);
if (cc->Inputs().Index(0).IsEmpty()) return ::mediapipe::OkStatus();
if (move_elements) {
return ProcessMovableElements<T>(cc);
} else {
return ProcessCopyableElements<T>(cc);
}
}
template <typename U, IsCopyable<U> = true>
::mediapipe::Status ProcessCopyableElements(CalculatorContext* cc) {
// static_assert(std::is_copy_constructible<U>::value,
// "Cannot copy non-copyable elements");
const auto& input = cc->Inputs().Index(0).Get<std::vector<U>>();
RET_CHECK_GE(input.size(), max_range_end_);
if (combine_outputs_) {
auto output = absl::make_unique<std::vector<T>>();
auto output = absl::make_unique<std::vector<U>>();
output->reserve(total_elements_);
for (int i = 0; i < ranges_.size(); ++i) {
auto elements = absl::make_unique<std::vector<T>>(
auto elements = absl::make_unique<std::vector<U>>(
input.begin() + ranges_[i].first,
input.begin() + ranges_[i].second);
output->insert(output->end(), elements->begin(), elements->end());
@ -134,7 +155,7 @@ class SplitVectorCalculator : public CalculatorBase {
if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket(
MakePacket<T>(input[ranges_[i].first]).At(cc->InputTimestamp()));
MakePacket<U>(input[ranges_[i].first]).At(cc->InputTimestamp()));
}
} else {
for (int i = 0; i < ranges_.size(); ++i) {
@ -149,7 +170,78 @@ class SplitVectorCalculator : public CalculatorBase {
return ::mediapipe::OkStatus();
}
template <typename U, IsNotCopyable<U> = true>
::mediapipe::Status ProcessCopyableElements(CalculatorContext* cc) {
return ::mediapipe::InternalError("Cannot copy non-copyable elements.");
}
template <typename U, IsMovable<U> = true>
::mediapipe::Status ProcessMovableElements(CalculatorContext* cc) {
::mediapipe::StatusOr<std::unique_ptr<std::vector<U>>> input_status =
cc->Inputs().Index(0).Value().Consume<std::vector<U>>();
if (!input_status.ok()) return input_status.status();
std::unique_ptr<std::vector<U>> input_vector =
std::move(input_status).ValueOrDie();
RET_CHECK_GE(input_vector->size(), max_range_end_);
if (combine_outputs_) {
auto output = absl::make_unique<std::vector<U>>();
output->reserve(total_elements_);
for (int i = 0; i < ranges_.size(); ++i) {
output->insert(
output->end(),
std::make_move_iterator(input_vector->begin() + ranges_[i].first),
std::make_move_iterator(input_vector->begin() + ranges_[i].second));
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
} else {
if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket(
MakePacket<U>(std::move(input_vector->at(ranges_[i].first)))
.At(cc->InputTimestamp()));
}
} else {
for (int i = 0; i < ranges_.size(); ++i) {
auto output = absl::make_unique<std::vector<T>>();
output->insert(
output->end(),
std::make_move_iterator(input_vector->begin() + ranges_[i].first),
std::make_move_iterator(input_vector->begin() +
ranges_[i].second));
cc->Outputs().Index(i).Add(output.release(), cc->InputTimestamp());
}
}
}
return ::mediapipe::OkStatus();
}
template <typename U, IsNotMovable<U> = true>
::mediapipe::Status ProcessMovableElements(CalculatorContext* cc) {
return ::mediapipe::InternalError("Cannot move non-movable elements.");
}
private:
static ::mediapipe::Status checkRangesDontOverlap(
const ::mediapipe::SplitVectorCalculatorOptions& options) {
for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i);
const auto& range_1 = options.ranges(j);
if ((range_0.begin() >= range_1.begin() &&
range_0.begin() < range_1.end()) ||
(range_1.begin() >= range_0.begin() &&
range_1.begin() < range_0.end())) {
return ::mediapipe::InvalidArgumentError(
"Ranges must be non-overlapping when using combine_outputs "
"option.");
}
}
}
return ::mediapipe::OkStatus();
}
std::vector<std::pair<int32, int32>> ranges_;
int32 max_range_end_ = -1;
int32 total_elements_ = 0;

View File

@ -452,4 +452,243 @@ TEST_F(SplitTfLiteTensorVectorCalculatorTest,
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
typedef SplitVectorCalculator<std::unique_ptr<int>, true>
MovableSplitUniqueIntPtrCalculator;
REGISTER_CALCULATOR(MovableSplitUniqueIntPtrCalculator);
class MovableSplitUniqueIntPtrCalculatorTest : public ::testing::Test {
protected:
void ValidateVectorOutput(std::vector<Packet>& output_packets,
int expected_elements, int input_begin_index) {
ASSERT_EQ(1, output_packets.size());
const std::vector<std::unique_ptr<int>>& output_vec =
output_packets[0].Get<std::vector<std::unique_ptr<int>>>();
ASSERT_EQ(expected_elements, output_vec.size());
for (int i = 0; i < expected_elements; ++i) {
const int expected_value = input_begin_index + i;
const std::unique_ptr<int>& result = output_vec[i];
ASSERT_NE(result, nullptr);
ASSERT_EQ(expected_value, *result);
}
}
void ValidateElementOutput(std::vector<Packet>& output_packets,
int expected_value) {
ASSERT_EQ(1, output_packets.size());
const std::unique_ptr<int>& result =
output_packets[0].Get<std::unique_ptr<int>>();
ASSERT_NE(result, nullptr);
ASSERT_EQ(expected_value, *result);
}
void ValidateCombinedVectorOutput(std::vector<Packet>& output_packets,
int expected_elements,
std::vector<int>& input_begin_indices,
std::vector<int>& input_end_indices) {
ASSERT_EQ(1, output_packets.size());
ASSERT_EQ(input_begin_indices.size(), input_end_indices.size());
const std::vector<std::unique_ptr<int>>& output_vector =
output_packets[0].Get<std::vector<std::unique_ptr<int>>>();
ASSERT_EQ(expected_elements, output_vector.size());
const int num_ranges = input_begin_indices.size();
int element_id = 0;
for (int range_id = 0; range_id < num_ranges; ++range_id) {
for (int i = input_begin_indices[range_id];
i < input_end_indices[range_id]; ++i) {
const int expected_value = i;
const std::unique_ptr<int>& result = output_vector[element_id];
ASSERT_NE(result, nullptr);
ASSERT_EQ(expected_value, *result);
++element_id;
}
}
}
};
TEST_F(MovableSplitUniqueIntPtrCalculatorTest, InvalidOverlappingRangesTest) {
// Prepare a graph to use the TestMovableSplitUniqueIntPtrVectorCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "input_vector"
node {
calculator: "MovableSplitUniqueIntPtrCalculator"
input_stream: "input_vector"
output_stream: "range_0"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 3 }
ranges: { begin: 1 end: 4 }
}
}
}
)");
// Run the graph.
CalculatorGraph graph;
// The graph should fail running because there are overlapping ranges.
ASSERT_FALSE(graph.Initialize(graph_config).ok());
}
TEST_F(MovableSplitUniqueIntPtrCalculatorTest, SmokeTest) {
// Prepare a graph to use the TestMovableSplitUniqueIntPtrVectorCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "input_vector"
node {
calculator: "MovableSplitUniqueIntPtrCalculator"
input_stream: "input_vector"
output_stream: "range_0"
output_stream: "range_1"
output_stream: "range_2"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 4 }
ranges: { begin: 4 end: 5 }
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
std::vector<Packet> range_1_packets;
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
std::vector<Packet> range_2_packets;
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
// input_vector : {0, 1, 2, 3, 4, 5}
std::unique_ptr<std::vector<std::unique_ptr<int>>> input_vector =
absl::make_unique<std::vector<std::unique_ptr<int>>>(6);
for (int i = 0; i < 6; ++i) {
input_vector->at(i) = absl::make_unique<int>(i);
}
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_vector", Adopt(input_vector.release()).At(Timestamp(1))));
MP_ASSERT_OK(graph.WaitUntilIdle());
MP_ASSERT_OK(graph.CloseAllPacketSources());
MP_ASSERT_OK(graph.WaitUntilDone());
ValidateVectorOutput(range_0_packets, /*expected_elements=*/1,
/*input_begin_index=*/0);
ValidateVectorOutput(range_1_packets, /*expected_elements=*/3,
/*input_begin_index=*/1);
ValidateVectorOutput(range_2_packets, /*expected_elements=*/1,
/*input_begin_index=*/4);
}
TEST_F(MovableSplitUniqueIntPtrCalculatorTest, SmokeTestElementOnly) {
// Prepare a graph to use the TestMovableSplitUniqueIntPtrVectorCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "input_vector"
node {
calculator: "MovableSplitUniqueIntPtrCalculator"
input_stream: "input_vector"
output_stream: "range_0"
output_stream: "range_1"
output_stream: "range_2"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 4 end: 5 }
element_only: true
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
std::vector<Packet> range_1_packets;
tool::AddVectorSink("range_1", &graph_config, &range_1_packets);
std::vector<Packet> range_2_packets;
tool::AddVectorSink("range_2", &graph_config, &range_2_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
// input_vector : {0, 1, 2, 3, 4, 5}
std::unique_ptr<std::vector<std::unique_ptr<int>>> input_vector =
absl::make_unique<std::vector<std::unique_ptr<int>>>(6);
for (int i = 0; i < 6; ++i) {
input_vector->at(i) = absl::make_unique<int>(i);
}
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_vector", Adopt(input_vector.release()).At(Timestamp(1))));
MP_ASSERT_OK(graph.WaitUntilIdle());
MP_ASSERT_OK(graph.CloseAllPacketSources());
MP_ASSERT_OK(graph.WaitUntilDone());
ValidateElementOutput(range_0_packets, /*expected_value=*/0);
ValidateElementOutput(range_1_packets, /*expected_value=*/2);
ValidateElementOutput(range_2_packets, /*expected_value=*/4);
}
TEST_F(MovableSplitUniqueIntPtrCalculatorTest, SmokeTestCombiningOutputs) {
// Prepare a graph to use the TestMovableSplitUniqueIntPtrVectorCalculator.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"(
input_stream: "input_vector"
node {
calculator: "MovableSplitUniqueIntPtrCalculator"
input_stream: "input_vector"
output_stream: "range_0"
options {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 4 end: 5 }
combine_outputs: true
}
}
}
)");
std::vector<Packet> range_0_packets;
tool::AddVectorSink("range_0", &graph_config, &range_0_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
// input_vector : {0, 1, 2, 3, 4, 5}
std::unique_ptr<std::vector<std::unique_ptr<int>>> input_vector =
absl::make_unique<std::vector<std::unique_ptr<int>>>(6);
for (int i = 0; i < 6; ++i) {
input_vector->at(i) = absl::make_unique<int>(i);
}
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_vector", Adopt(input_vector.release()).At(Timestamp(1))));
MP_ASSERT_OK(graph.WaitUntilIdle());
MP_ASSERT_OK(graph.CloseAllPacketSources());
MP_ASSERT_OK(graph.WaitUntilDone());
std::vector<int> input_begin_indices = {0, 2, 4};
std::vector<int> input_end_indices = {1, 3, 5};
ValidateCombinedVectorOutput(range_0_packets, /*expected_elements=*/3,
input_begin_indices, input_end_indices);
}
} // namespace mediapipe

View File

@ -80,7 +80,9 @@ mediapipe_cc_proto_library(
name = "opencv_image_encoder_calculator_cc_proto",
srcs = ["opencv_image_encoder_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
visibility = [
"//visibility:public",
],
deps = [":opencv_image_encoder_calculator_proto"],
)

View File

@ -474,13 +474,20 @@ ScaleImageCalculator::~ScaleImageCalculator() {}
input_width_, "x", input_height_));
}
if (input_format_ != image_frame.Format()) {
std::string image_frame_format_desc, input_format_desc;
#ifdef MEDIAPIPE_MOBILE
image_frame_format_desc = std::to_string(image_frame.Format());
input_format_desc = std::to_string(input_format_);
#else
const proto_ns::EnumDescriptor* desc = ImageFormat::Format_descriptor();
image_frame_format_desc =
desc->FindValueByNumber(image_frame.Format())->DebugString();
input_format_desc = desc->FindValueByNumber(input_format_)->DebugString();
#endif // MEDIAPIPE_MOBILE
return tool::StatusFail(absl::StrCat(
"If a header specifies a format, then image frames on "
"the stream must have that format. Actual format ",
desc->FindValueByNumber(image_frame.Format())->DebugString(),
" but expected ",
desc->FindValueByNumber(input_format_)->DebugString()));
image_frame_format_desc, " but expected ", input_format_desc));
}
}
return ::mediapipe::OkStatus();

View File

@ -264,7 +264,7 @@ class PackMediaSequenceCalculator : public CalculatorBase {
if (options.output_only_if_all_present()) {
::mediapipe::Status status = VerifySequence();
if (!status.ok()) {
cc->GetCounter(status.error_message())->Increment();
cc->GetCounter(status.ToString())->Increment();
return status;
}
}

View File

@ -454,7 +454,7 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
// RET_CHECK on the tf::Status object itself in order to print an
// informative error message.
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.error_message();
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.ToString();
const int64 run_end_time = absl::ToUnixMicros(clock_->TimeNow());
cc->GetCounter(kTotalSessionRunsTimeUsecsCounterSuffix)

View File

@ -109,7 +109,7 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase {
RET_CHECK(graph_def.ParseFromString(graph_def_serialized));
const tf::Status tf_status = session->session->Create(graph_def);
RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.error_message();
RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.ToString();
for (const auto& key_value : options.tag_to_tensor_names()) {
session->tag_to_tensor_map[key_value.first] = key_value.second;
@ -119,7 +119,7 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase {
session->session->Run({}, {}, initialization_op_names, {});
// RET_CHECK on the tf::Status object itself in order to print an
// informative error message.
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.error_message();
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.ToString();
}
cc->OutputSidePackets().Tag("SESSION").Set(Adopt(session.release()));

View File

@ -109,7 +109,7 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator {
RET_CHECK(graph_def.ParseFromString(graph_def_serialized));
const tf::Status tf_status = session->session->Create(graph_def);
RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.error_message();
RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.ToString();
for (const auto& key_value : options.tag_to_tensor_names()) {
session->tag_to_tensor_map[key_value.first] = key_value.second;
@ -119,7 +119,7 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator {
session->session->Run({}, {}, initialization_op_names, {});
// RET_CHECK on the tf::Status object itself in order to print an
// informative error message.
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.error_message();
RET_CHECK(tf_status.ok()) << "Run failed: " << tf_status.ToString();
}
output_side_packets->Tag("SESSION") = Adopt(session.release());

View File

@ -140,7 +140,7 @@ class TensorFlowSessionFromSavedModelCalculator : public CalculatorBase {
if (!status.ok()) {
return ::mediapipe::Status(
static_cast<::mediapipe::StatusCode>(status.code()),
status.error_message());
status.ToString());
}
auto session = absl::make_unique<TensorFlowSession>();

View File

@ -135,7 +135,7 @@ class TensorFlowSessionFromSavedModelGenerator : public PacketGenerator {
if (!status.ok()) {
return ::mediapipe::Status(
static_cast<::mediapipe::StatusCode>(status.code()),
status.error_message());
status.ToString());
}
auto session = absl::make_unique<TensorFlowSession>();

View File

@ -81,7 +81,7 @@ class TFRecordReaderCalculator : public CalculatorBase {
auto tf_status = tensorflow::Env::Default()->NewRandomAccessFile(
cc->InputSidePackets().Tag(kTFRecordPath).Get<std::string>(), &file);
RET_CHECK(tf_status.ok())
<< "Failed to open tfrecord file: " << tf_status.error_message();
<< "Failed to open tfrecord file: " << tf_status.ToString();
tensorflow::io::RecordReader reader(file.get(),
tensorflow::io::RecordReaderOptions());
tensorflow::uint64 offset = 0;
@ -94,7 +94,7 @@ class TFRecordReaderCalculator : public CalculatorBase {
while (current_idx <= target_idx) {
tf_status = reader.ReadRecord(&offset, &example_str);
RET_CHECK(tf_status.ok())
<< "Failed to read tfrecord: " << tf_status.error_message();
<< "Failed to read tfrecord: " << tf_status.ToString();
if (current_idx == target_idx) {
if (cc->OutputSidePackets().HasTag(kExampleTag)) {
tensorflow::Example tf_example;

View File

@ -294,11 +294,15 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
if (use_quantized_tensors_) {
RET_CHECK(image_frame.Format() != mediapipe::ImageFormat::VEC32F1)
<< "Only 8-bit input images are supported for quantization.";
quant.type = kTfLiteAffineQuantization;
quant.params = nullptr;
// Optional: Set 'quant' quantization params here if needed.
interpreter_->SetTensorParametersReadWrite(0, kTfLiteUInt8, "",
{channels_preserved}, quant);
} else {
// Default TfLiteQuantization used for no quantization.
// Initialize structure for no quantization.
quant.type = kTfLiteNoQuantization;
quant.params = nullptr;
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "",
{channels_preserved}, quant);
}
@ -422,40 +426,35 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
#elif defined(MEDIAPIPE_IOS)
// GpuBuffer to id<MTLBuffer> conversion.
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<mediapipe::GpuBuffer>();
{
id<MTLTexture> src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input];
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TfLiteConverterCalculatorConvert";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
[compute_encoder setComputePipelineState:gpu_data_out_->pipeline_state];
[compute_encoder setTexture:src_texture atIndex:0];
[compute_encoder setBuffer:gpu_data_out_->buffer offset:0 atIndex:1];
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1);
MTLSize threadgroups =
MTLSizeMake(NumGroups(input.width(), kWorkgroupSize),
NumGroups(input.height(), kWorkgroupSize), 1);
[compute_encoder dispatchThreadgroups:threadgroups
threadsPerThreadgroup:threads_per_group];
[compute_encoder endEncoding];
[command_buffer commit];
[command_buffer waitUntilCompleted];
}
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
id<MTLTexture> src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input];
command_buffer.label = @"TfLiteConverterCalculatorConvertAndBlit";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
[compute_encoder setComputePipelineState:gpu_data_out_->pipeline_state];
[compute_encoder setTexture:src_texture atIndex:0];
[compute_encoder setBuffer:gpu_data_out_->buffer offset:0 atIndex:1];
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1);
MTLSize threadgroups =
MTLSizeMake(NumGroups(input.width(), kWorkgroupSize),
NumGroups(input.height(), kWorkgroupSize), 1);
[compute_encoder dispatchThreadgroups:threadgroups
threadsPerThreadgroup:threads_per_group];
[compute_encoder endEncoding];
// Copy into outputs.
// TODO Avoid this copy.
auto output_tensors = absl::make_unique<std::vector<GpuTensor>>();
output_tensors->resize(1);
{
id<MTLDevice> device = gpu_helper_.mtlDevice;
output_tensors->at(0) =
[device newBufferWithLength:gpu_data_out_->elements * sizeof(float)
options:MTLResourceStorageModeShared];
[MPPMetalUtil blitMetalBufferTo:output_tensors->at(0)
from:gpu_data_out_->buffer
blocking:true
commandBuffer:[gpu_helper_ commandBuffer]];
}
id<MTLDevice> device = gpu_helper_.mtlDevice;
output_tensors->at(0) =
[device newBufferWithLength:gpu_data_out_->elements * sizeof(float)
options:MTLResourceStorageModeShared];
[MPPMetalUtil blitMetalBufferTo:output_tensors->at(0)
from:gpu_data_out_->buffer
blocking:false
commandBuffer:command_buffer];
cc->Outputs()
.Tag("TENSORS_GPU")

View File

@ -56,6 +56,10 @@
#endif // ANDROID
namespace {
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) { // NOLINT
return (size + group_size - 1) / group_size;
}
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
typedef ::tflite::gpu::gl::GlBuffer GpuTensor;
@ -176,12 +180,13 @@ class TfLiteInferenceCalculator : public CalculatorBase {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
mediapipe::GlCalculatorHelper gpu_helper_;
std::unique_ptr<GPUData> gpu_data_in_;
std::vector<std::unique_ptr<GPUData>> gpu_data_in_;
std::vector<std::unique_ptr<GPUData>> gpu_data_out_;
#elif defined(MEDIAPIPE_IOS)
MPPMetalHelper* gpu_helper_ = nullptr;
std::unique_ptr<GPUData> gpu_data_in_;
std::vector<std::unique_ptr<GPUData>> gpu_data_in_;
std::vector<std::unique_ptr<GPUData>> gpu_data_out_;
id<MTLComputePipelineState> fp32_to_fp16_program_;
TFLBufferConvert* converter_from_BPHWC4_ = nil;
#endif
@ -308,22 +313,41 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
RET_CHECK_EQ(input_tensors.size(), 1);
RET_CHECK_GT(input_tensors.size(), 0);
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors]() -> ::mediapipe::Status {
// Explicit copy input.
RET_CHECK_CALL(CopyBuffer(input_tensors[0], gpu_data_in_->buffer));
gpu_data_in_.resize(input_tensors.size());
for (int i = 0; i < input_tensors.size(); ++i) {
RET_CHECK_CALL(
CopyBuffer(input_tensors[i], gpu_data_in_[i]->buffer));
}
return ::mediapipe::OkStatus();
}));
#elif defined(MEDIAPIPE_IOS)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
RET_CHECK_EQ(input_tensors.size(), 1);
// Explicit copy input.
[MPPMetalUtil blitMetalBufferTo:gpu_data_in_->buffer
from:input_tensors[0]
blocking:true
commandBuffer:[gpu_helper_ commandBuffer]];
RET_CHECK_GT(input_tensors.size(), 0);
// Explicit copy input with conversion float 32 bits to 16 bits.
gpu_data_in_.resize(input_tensors.size());
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TfLiteInferenceCalculatorConvert";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
[compute_encoder setComputePipelineState:fp32_to_fp16_program_];
for (int i = 0; i < input_tensors.size(); ++i) {
[compute_encoder setBuffer:input_tensors[i] offset:0 atIndex:0];
[compute_encoder setBuffer:gpu_data_in_[i]->buffer offset:0 atIndex:1];
constexpr int kWorkgroupSize = 64; // Block size for GPU shader.
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, 1, 1);
const int threadgroups =
NumGroups(gpu_data_in_[i]->elements, kWorkgroupSize);
[compute_encoder dispatchThreadgroups:MTLSizeMake(threadgroups, 1, 1)
threadsPerThreadgroup:threads_per_group];
}
[compute_encoder endEncoding];
[command_buffer commit];
#else
RET_CHECK_FAIL() << "GPU processing not enabled.";
#endif
@ -404,7 +428,6 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
}
[convert_command endEncoding];
[command_buffer commit];
[command_buffer waitUntilCompleted];
cc->Outputs()
.Tag("TENSORS_GPU")
.Add(output_tensors.release(), cc->InputTimestamp());
@ -432,7 +455,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
TfLiteGpuDelegateDelete(delegate_);
gpu_data_in_.reset();
for (int i = 0; i < gpu_data_in_.size(); ++i) {
gpu_data_in_[i].reset();
}
for (int i = 0; i < gpu_data_out_.size(); ++i) {
gpu_data_out_[i].reset();
}
@ -440,7 +465,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
}));
#elif defined(MEDIAPIPE_IOS)
TFLGpuDelegateDelete(delegate_);
gpu_data_in_.reset();
for (int i = 0; i < gpu_data_in_.size(); ++i) {
gpu_data_in_[i].reset();
}
for (int i = 0; i < gpu_data_out_.size(); ++i) {
gpu_data_out_[i].reset();
}
@ -545,24 +572,24 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
if (gpu_input_) {
// Get input image sizes.
gpu_data_in_ = absl::make_unique<GPUData>();
const auto& input_indices = interpreter_->inputs();
RET_CHECK_EQ(input_indices.size(), 1); // TODO accept > 1.
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[0]);
gpu_data_in_->elements = 1;
for (int d = 0; d < tensor->dims->size; ++d) {
gpu_data_in_->elements *= tensor->dims->data[d];
gpu_data_in_.resize(input_indices.size());
for (int i = 0; i < input_indices.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[0]);
gpu_data_in_[i] = absl::make_unique<GPUData>();
gpu_data_in_[i]->elements = 1;
for (int d = 0; d < tensor->dims->size; ++d) {
gpu_data_in_[i]->elements *= tensor->dims->data[d];
}
// Create and bind input buffer.
RET_CHECK_CALL(
::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer<float>(
gpu_data_in_[i]->elements, &gpu_data_in_[i]->buffer));
RET_CHECK_EQ(TfLiteGpuDelegateBindBufferToTensor(
delegate_, gpu_data_in_[i]->buffer.id(),
interpreter_->inputs()[i]),
kTfLiteOk);
}
CHECK_GE(tensor->dims->data[3], 1);
CHECK_LE(tensor->dims->data[3], 4);
CHECK_NE(tensor->dims->data[3], 2);
// Create and bind input buffer.
RET_CHECK_CALL(::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer<float>(
gpu_data_in_->elements, &gpu_data_in_->buffer));
RET_CHECK_EQ(TfLiteGpuDelegateBindBufferToTensor(
delegate_, gpu_data_in_->buffer.id(),
interpreter_->inputs()[0]), // First tensor only
kTfLiteOk);
}
if (gpu_output_) {
// Get output image sizes.
@ -594,41 +621,68 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
#endif // OpenGL
#if defined(MEDIAPIPE_IOS)
const int kHalfSize = 2; // sizeof(half)
// Configure and create the delegate.
TFLGpuDelegateOptions options;
options.allow_precision_loss = false; // Must match converter, F=float/T=half
options.allow_precision_loss = true;
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
if (!delegate_) delegate_ = TFLGpuDelegateCreate(&options);
id<MTLDevice> device = gpu_helper_.mtlDevice;
if (gpu_input_) {
// Get input image sizes.
gpu_data_in_ = absl::make_unique<GPUData>();
const auto& input_indices = interpreter_->inputs();
RET_CHECK_EQ(input_indices.size(), 1);
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[0]);
gpu_data_in_->elements = 1;
// On iOS GPU, input must be 4 channels, regardless of what model expects.
{
gpu_data_in_->elements *= tensor->dims->data[0]; // batch
gpu_data_in_->elements *= tensor->dims->data[1]; // height
gpu_data_in_->elements *= tensor->dims->data[2]; // width
gpu_data_in_->elements *= 4; // channels
gpu_data_in_.resize(input_indices.size());
for (int i = 0; i < input_indices.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[i]);
gpu_data_in_[i] = absl::make_unique<GPUData>();
gpu_data_in_[i]->shape.b = tensor->dims->data[0];
gpu_data_in_[i]->shape.h = tensor->dims->data[1];
gpu_data_in_[i]->shape.w = tensor->dims->data[2];
// On iOS GPU, input must be 4 channels, regardless of what model expects.
gpu_data_in_[i]->shape.c = 4;
gpu_data_in_[i]->elements =
gpu_data_in_[i]->shape.b * gpu_data_in_[i]->shape.h *
gpu_data_in_[i]->shape.w * gpu_data_in_[i]->shape.c;
// Input to model can be RGBA only.
if (tensor->dims->data[3] != 4) {
LOG(WARNING) << "Please ensure input GPU tensor is 4 channels.";
}
const std::string shader_source =
absl::Substitute(R"(#include <metal_stdlib>
using namespace metal;
kernel void convertKernel(device float4* const input_buffer [[buffer(0)]],
device half4* output_buffer [[buffer(1)]],
uint gid [[thread_position_in_grid]]) {
if (gid >= $0) return;
output_buffer[gid] = half4(input_buffer[gid]);
})",
gpu_data_in_[i]->elements / 4);
NSString* library_source =
[NSString stringWithUTF8String:shader_source.c_str()];
NSError* error = nil;
id<MTLLibrary> library =
[device newLibraryWithSource:library_source options:nil error:&error];
RET_CHECK(library != nil) << "Couldn't create shader library "
<< [[error localizedDescription] UTF8String];
id<MTLFunction> kernel_func = nil;
kernel_func = [library newFunctionWithName:@"convertKernel"];
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
fp32_to_fp16_program_ =
[device newComputePipelineStateWithFunction:kernel_func error:&error];
RET_CHECK(fp32_to_fp16_program_ != nil)
<< "Couldn't create pipeline state "
<< [[error localizedDescription] UTF8String];
// Create and bind input buffer.
gpu_data_in_[i]->buffer =
[device newBufferWithLength:gpu_data_in_[i]->elements * kHalfSize
options:MTLResourceStorageModeShared];
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_), kTfLiteOk);
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
delegate_, input_indices[i], gpu_data_in_[i]->buffer),
true);
}
// Input to model can be RGBA only.
if (tensor->dims->data[3] != 4) {
LOG(WARNING) << "Please ensure input GPU tensor is 4 channels.";
}
// Create and bind input buffer.
gpu_data_in_->buffer =
[device newBufferWithLength:gpu_data_in_->elements * sizeof(float)
options:MTLResourceStorageModeShared];
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_), kTfLiteOk);
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
delegate_,
input_indices[0], // First tensor only
gpu_data_in_->buffer),
true);
}
if (gpu_output_) {
// Get output image sizes.
@ -669,15 +723,16 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
interpreter_->SetAllowBufferHandleOutput(true);
for (int i = 0; i < gpu_data_out_.size(); ++i) {
gpu_data_out_[i]->buffer =
[device newBufferWithLength:gpu_data_out_[i]->elements * sizeof(float)
[device newBufferWithLength:gpu_data_out_[i]->elements * kHalfSize
options:MTLResourceStorageModeShared];
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
delegate_, output_indices[i], gpu_data_out_[i]->buffer),
true);
}
// Create converter for GPU output.
converter_from_BPHWC4_ = [[TFLBufferConvert alloc] initWithDevice:device
isFloat16:false
isFloat16:true
convertToPBHWC4:false];
if (converter_from_BPHWC4_ == nil) {
return mediapipe::InternalError(

View File

@ -472,11 +472,11 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
// Copy inputs.
[MPPMetalUtil blitMetalBufferTo:gpu_data_->raw_boxes_buffer
from:input_tensors[0]
blocking:true
blocking:false
commandBuffer:[gpu_helper_ commandBuffer]];
[MPPMetalUtil blitMetalBufferTo:gpu_data_->raw_scores_buffer
from:input_tensors[1]
blocking:true
blocking:false
commandBuffer:[gpu_helper_ commandBuffer]];
if (!anchors_init_) {
if (side_packet_anchors_) {
@ -491,48 +491,37 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
RET_CHECK_EQ(input_tensors.size(), kNumInputTensorsWithAnchors);
[MPPMetalUtil blitMetalBufferTo:gpu_data_->raw_anchors_buffer
from:input_tensors[2]
blocking:true
blocking:false
commandBuffer:[gpu_helper_ commandBuffer]];
}
anchors_init_ = true;
}
// Run shaders.
{
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TfLiteDecodeBoxes";
id<MTLComputeCommandEncoder> decode_command =
[command_buffer computeCommandEncoder];
[decode_command setComputePipelineState:gpu_data_->decode_program];
[decode_command setBuffer:gpu_data_->decoded_boxes_buffer
offset:0
atIndex:0];
[decode_command setBuffer:gpu_data_->raw_boxes_buffer offset:0 atIndex:1];
[decode_command setBuffer:gpu_data_->raw_anchors_buffer offset:0 atIndex:2];
MTLSize decode_threads_per_group = MTLSizeMake(1, 1, 1);
MTLSize decode_threadgroups = MTLSizeMake(num_boxes_, 1, 1);
[decode_command dispatchThreadgroups:decode_threadgroups
threadsPerThreadgroup:decode_threads_per_group];
[decode_command endEncoding];
[command_buffer commit];
[command_buffer waitUntilCompleted];
}
{
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TfLiteScoreBoxes";
id<MTLComputeCommandEncoder> score_command =
[command_buffer computeCommandEncoder];
[score_command setComputePipelineState:gpu_data_->score_program];
[score_command setBuffer:gpu_data_->scored_boxes_buffer offset:0 atIndex:0];
[score_command setBuffer:gpu_data_->raw_scores_buffer offset:0 atIndex:1];
MTLSize score_threads_per_group = MTLSizeMake(1, num_classes_, 1);
MTLSize score_threadgroups = MTLSizeMake(num_boxes_, 1, 1);
[score_command dispatchThreadgroups:score_threadgroups
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TfLiteDecodeAndScoreBoxes";
id<MTLComputeCommandEncoder> command_encoder =
[command_buffer computeCommandEncoder];
[command_encoder setComputePipelineState:gpu_data_->decode_program];
[command_encoder setBuffer:gpu_data_->decoded_boxes_buffer
offset:0
atIndex:0];
[command_encoder setBuffer:gpu_data_->raw_boxes_buffer offset:0 atIndex:1];
[command_encoder setBuffer:gpu_data_->raw_anchors_buffer offset:0 atIndex:2];
MTLSize decode_threads_per_group = MTLSizeMake(1, 1, 1);
MTLSize decode_threadgroups = MTLSizeMake(num_boxes_, 1, 1);
[command_encoder dispatchThreadgroups:decode_threadgroups
threadsPerThreadgroup:decode_threads_per_group];
[command_encoder setComputePipelineState:gpu_data_->score_program];
[command_encoder setBuffer:gpu_data_->scored_boxes_buffer offset:0 atIndex:0];
[command_encoder setBuffer:gpu_data_->raw_scores_buffer offset:0 atIndex:1];
MTLSize score_threads_per_group = MTLSizeMake(1, num_classes_, 1);
MTLSize score_threadgroups = MTLSizeMake(num_boxes_, 1, 1);
[command_encoder dispatchThreadgroups:score_threadgroups
threadsPerThreadgroup:score_threads_per_group];
[score_command endEncoding];
[command_buffer commit];
[command_buffer waitUntilCompleted];
}
[command_encoder endEncoding];
[MPPMetalUtil commitCommandBufferAndWait:command_buffer];
// Copy decoded boxes from GPU to CPU.
std::vector<float> boxes(num_boxes_ * num_coords_);

View File

@ -65,6 +65,15 @@ proto_library(
],
)
proto_library(
name = "video_pre_stream_calculator_proto",
srcs = ["video_pre_stream_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "motion_analysis_calculator_cc_proto",
srcs = ["motion_analysis_calculator.proto"],
@ -98,6 +107,16 @@ mediapipe_cc_proto_library(
deps = [":box_tracker_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "video_pre_stream_calculator_cc_proto",
srcs = ["video_pre_stream_calculator.proto"],
cc_deps = [
"//mediapipe/framework:calculator_cc_proto",
],
visibility = ["//visibility:public"],
deps = [":video_pre_stream_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "flow_to_image_calculator_cc_proto",
srcs = ["flow_to_image_calculator.proto"],
@ -280,6 +299,19 @@ cc_library(
alwayslink = 1,
)
cc_library(
name = "video_pre_stream_calculator",
srcs = ["video_pre_stream_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":video_pre_stream_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:video_stream_header",
],
alwayslink = 1,
)
filegroup(
name = "test_videos",
srcs = [
@ -411,3 +443,17 @@ cc_test(
"//mediapipe/util/tracking:tracking_cc_proto",
],
)
cc_test(
name = "video_pre_stream_calculator_test",
srcs = ["video_pre_stream_calculator_test.cc"],
deps = [
":video_pre_stream_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
],
)

View File

@ -72,6 +72,8 @@ ImageFormat::Format GetImageFormat(int num_channels) {
// OpenCV's VideoCapture doesn't decode audio tracks. If the audio tracks need
// to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH".
// The calculator will call FFmpeg binary to save audio tracks as an aac file.
// If the audio tracks can't be extracted by FFmpeg, the output side packet
// will contain an empty std::string.
//
// Example config:
// node {
@ -150,13 +152,23 @@ class OpenCvVideoDecoderCalculator : public CalculatorBase {
if (cc->OutputSidePackets().HasTag("SAVED_AUDIO_PATH")) {
#ifdef HAVE_FFMPEG
std::string saved_audio_path = std::tmpnam(nullptr);
system(absl::StrCat("ffmpeg -nostats -loglevel 0 -i ", input_file_path,
" -vn -f adts ", saved_audio_path)
.c_str());
cc->OutputSidePackets()
.Tag("SAVED_AUDIO_PATH")
.Set(MakePacket<std::string>(saved_audio_path));
std::string ffmpeg_command =
absl::StrCat("ffmpeg -nostats -loglevel 0 -i ", input_file_path,
" -vn -f adts ", saved_audio_path);
system(ffmpeg_command.c_str());
int status_code = system(absl::StrCat("ls ", saved_audio_path).c_str());
if (status_code == 0) {
cc->OutputSidePackets()
.Tag("SAVED_AUDIO_PATH")
.Set(MakePacket<std::string>(saved_audio_path));
} else {
LOG(WARNING) << "FFmpeg can't extract audio from " << input_file_path
<< " by executing the following command: "
<< ffmpeg_command;
cc->OutputSidePackets()
.Tag("SAVED_AUDIO_PATH")
.Set(MakePacket<std::string>(std::string()));
}
#else
return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "OpenCVVideoDecoderCalculator can't save the audio file "

View File

@ -55,8 +55,12 @@ TEST(OpenCvVideoDecoderCalculatorTest, TestMp4Avc720pVideo) {
EXPECT_EQ(640, header.height);
EXPECT_FLOAT_EQ(6.0f, header.duration);
EXPECT_FLOAT_EQ(30.0f, header.frame_rate);
EXPECT_EQ(180, runner.Outputs().Tag("VIDEO").packets.size());
for (int i = 0; i < 180; ++i) {
// The number of the output packets should be 180.
// Some OpenCV version returns the first two frames with the same timestamp on
// macos and we might miss one frame here.
int num_of_packets = runner.Outputs().Tag("VIDEO").packets.size();
EXPECT_GE(num_of_packets, 179);
for (int i = 0; i < num_of_packets; ++i) {
Packet image_frame_packet = runner.Outputs().Tag("VIDEO").packets[i];
cv::Mat output_mat =
formats::MatView(&(image_frame_packet.Get<ImageFrame>()));
@ -141,8 +145,12 @@ TEST(OpenCvVideoDecoderCalculatorTest, TestMkvVp8Video) {
EXPECT_EQ(320, header.height);
EXPECT_FLOAT_EQ(6.0f, header.duration);
EXPECT_FLOAT_EQ(30.0f, header.frame_rate);
EXPECT_EQ(180, runner.Outputs().Tag("VIDEO").packets.size());
for (int i = 0; i < 180; ++i) {
// The number of the output packets should be 180.
// Some OpenCV version returns the first two frames with the same timestamp on
// macos and we might miss one frame here.
int num_of_packets = runner.Outputs().Tag("VIDEO").packets.size();
EXPECT_GE(num_of_packets, 179);
for (int i = 0; i < num_of_packets; ++i) {
Packet image_frame_packet = runner.Outputs().Tag("VIDEO").packets[i];
cv::Mat output_mat =
formats::MatView(&(image_frame_packet.Get<ImageFrame>()));

View File

@ -183,14 +183,20 @@ class OpenCvVideoEncoderCalculator : public CalculatorBase {
#ifdef HAVE_FFMPEG
const std::string& audio_file_path =
cc->InputSidePackets().Tag("AUDIO_FILE_PATH").Get<std::string>();
// A temp output file is needed because FFmpeg can't do in-place editing.
const std::string temp_file_path = std::tmpnam(nullptr);
system(absl::StrCat("mv ", output_file_path_, " ", temp_file_path,
"&& ffmpeg -nostats -loglevel 0 -i ", temp_file_path,
" -i ", audio_file_path,
" -c copy -map 0:v:0 -map 1:a:0 ", output_file_path_,
"&& rm ", temp_file_path)
.c_str());
if (audio_file_path.empty()) {
LOG(WARNING) << "OpenCvVideoEncoderCalculator isn't able to attach the "
"audio tracks to the generated video because the audio "
"file path is not specified.";
} else {
// A temp output file is needed because FFmpeg can't do in-place editing.
const std::string temp_file_path = std::tmpnam(nullptr);
system(absl::StrCat("mv ", output_file_path_, " ", temp_file_path,
"&& ffmpeg -nostats -loglevel 0 -i ", temp_file_path,
" -i ", audio_file_path,
" -c copy -map 0:v:0 -map 1:a:0 ", output_file_path_,
"&& rm ", temp_file_path)
.c_str());
}
#else
return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)

View File

@ -210,8 +210,8 @@ TEST(OpenCvVideoEncoderCalculatorTest, TestMkvVp8Video) {
EXPECT_EQ(video_header.frame_rate,
static_cast<double>(cap.get(cv::CAP_PROP_FPS)));
EXPECT_EQ(video_header.duration,
static_cast<int>(cap.get(cv::CAP_PROP_FRAME_COUNT) /
cap.get(cv::CAP_PROP_FPS)));
static_cast<int>(std::round(cap.get(cv::CAP_PROP_FRAME_COUNT) /
cap.get(cv::CAP_PROP_FPS))));
}
} // namespace

View File

@ -0,0 +1,142 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/video/video_pre_stream_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/video_stream_header.h"
namespace mediapipe {
// Sets up VideoHeader based on the 1st ImageFrame and emits it with timestamp
// PreStream. Note that this calculator only fills in format, width, and height,
// i.e. frame_rate and duration will not be filled, unless:
// 1) an existing VideoHeader is provided at PreStream(). In such case, the
// frame_rate and duration, if they exist, will be copied from the existing
// VideoHeader.
// 2) you specify frame_rate and duration through the options. In this case, the
// options will overwrite the existing VideoHeader if it is available.
//
// Example config:
// node {
// calculator: "VideoPreStreamCalculator"
// input_stream: "FRAME:cropped_frames"
// input_stream: "VIDEO_PRESTREAM:original_video_header"
// output_stream: "cropped_frames_video_header"
// }
//
// or
//
// node {
// calculator: "VideoPreStreamCalculator"
// input_stream: "cropped_frames"
// output_stream: "video_header"
// }
class VideoPreStreamCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
::mediapipe::Status ProcessWithFrameRateInPreStream(CalculatorContext* cc);
::mediapipe::Status ProcessWithFrameRateInOptions(CalculatorContext* cc);
std::unique_ptr<VideoHeader> header_;
bool frame_rate_in_prestream_ = false;
bool emitted_ = false;
};
REGISTER_CALCULATOR(VideoPreStreamCalculator);
::mediapipe::Status VideoPreStreamCalculator::GetContract(
CalculatorContract* cc) {
if (!cc->Inputs().UsesTags()) {
cc->Inputs().Index(0).Set<ImageFrame>();
} else {
cc->Inputs().Tag("FRAME").Set<ImageFrame>();
cc->Inputs().Tag("VIDEO_PRESTREAM").Set<VideoHeader>();
}
cc->Outputs().Index(0).Set<VideoHeader>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status VideoPreStreamCalculator::Open(CalculatorContext* cc) {
frame_rate_in_prestream_ = cc->Inputs().UsesTags() &&
cc->Inputs().HasTag("FRAME") &&
cc->Inputs().HasTag("VIDEO_PRESTREAM");
header_ = absl::make_unique<VideoHeader>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status VideoPreStreamCalculator::ProcessWithFrameRateInPreStream(
CalculatorContext* cc) {
cc->GetCounter("ProcessWithFrameRateInPreStream")->Increment();
if (cc->InputTimestamp() == Timestamp::PreStream()) {
RET_CHECK(cc->Inputs().Tag("FRAME").IsEmpty());
RET_CHECK(!cc->Inputs().Tag("VIDEO_PRESTREAM").IsEmpty());
*header_ = cc->Inputs().Tag("VIDEO_PRESTREAM").Get<VideoHeader>();
RET_CHECK_NE(header_->frame_rate, 0.0) << "frame rate should be non-zero";
} else {
RET_CHECK(cc->Inputs().Tag("VIDEO_PRESTREAM").IsEmpty())
<< "Packet on VIDEO_PRESTREAM must come in at Timestamp::PreStream().";
RET_CHECK(!cc->Inputs().Tag("FRAME").IsEmpty());
const auto& frame = cc->Inputs().Tag("FRAME").Get<ImageFrame>();
header_->format = frame.Format();
header_->width = frame.Width();
header_->height = frame.Height();
RET_CHECK_NE(header_->frame_rate, 0.0) << "frame rate should be non-zero";
cc->Outputs().Index(0).Add(header_.release(), Timestamp::PreStream());
emitted_ = true;
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status VideoPreStreamCalculator::Process(CalculatorContext* cc) {
cc->GetCounter("Process")->Increment();
if (emitted_) {
return ::mediapipe::OkStatus();
}
if (frame_rate_in_prestream_) {
return ProcessWithFrameRateInPreStream(cc);
} else {
return ProcessWithFrameRateInOptions(cc);
}
}
::mediapipe::Status VideoPreStreamCalculator::ProcessWithFrameRateInOptions(
CalculatorContext* cc) {
cc->GetCounter("ProcessWithFrameRateInOptions")->Increment();
RET_CHECK_NE(cc->InputTimestamp(), Timestamp::PreStream());
const auto& frame = cc->Inputs().Index(0).Get<ImageFrame>();
header_->format = frame.Format();
header_->width = frame.Width();
header_->height = frame.Height();
const auto& options = cc->Options<VideoPreStreamCalculatorOptions>();
if (options.fps().has_value()) {
header_->frame_rate = options.fps().value();
} else if (options.fps().has_ratio()) {
const VideoPreStreamCalculatorOptions::Fps::Rational32& ratio =
options.fps().ratio();
if (ratio.numerator() > 0 && ratio.denominator() > 0) {
header_->frame_rate =
static_cast<double>(ratio.numerator()) / ratio.denominator();
}
}
RET_CHECK_NE(header_->frame_rate, 0.0) << "frame rate should be non-zero";
cc->Outputs().Index(0).Add(header_.release(), Timestamp::PreStream());
emitted_ = true;
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,43 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message VideoPreStreamCalculatorOptions {
extend CalculatorOptions {
optional VideoPreStreamCalculatorOptions ext = 151386123;
}
// An arbitrary number of frames per second.
// Prefer the StandardFps enum to store industry-standard, safe FPS values.
message Fps {
// The possibly approximated value of the frame rate, in frames per second.
// Unsafe to use in accurate computations because prone to rounding errors.
// For example, the 23.976 FPS value has no exact representation as a
// double.
optional double value = 1;
message Rational32 {
optional int32 numerator = 1;
optional int32 denominator = 2;
}
// The exact value of the frame rate, as a rational number.
optional Rational32 ratio = 2;
}
optional Fps fps = 1;
}

View File

@ -0,0 +1,186 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
TEST(VideoPreStreamCalculatorTest, ProcessesWithFrameRateInOptions) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "input"
node {
calculator: "VideoPreStreamCalculator"
input_stream: "input"
output_stream: "output"
options {
[mediapipe.VideoPreStreamCalculatorOptions.ext] { fps { value: 3 } }
}
})");
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
auto poller_status = graph.AddOutputStreamPoller("output");
MP_ASSERT_OK(poller_status.status());
OutputStreamPoller& poller = poller_status.ValueOrDie();
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input",
Adopt(new ImageFrame(ImageFormat::SRGB, 1, 2)).At(Timestamp(0))));
// It is *not* VideoPreStreamCalculator's job to detect errors in an
// ImageFrame stream. It just waits for the 1st ImageFrame, extracts info for
// VideoHeader, and emits it. Thus, the following is fine.
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input",
Adopt(new ImageFrame(ImageFormat::SRGBA, 3, 4)).At(Timestamp(1))));
MP_ASSERT_OK(graph.CloseInputStream("input"));
Packet packet;
ASSERT_TRUE(poller.Next(&packet));
const auto& video_header = packet.Get<VideoHeader>();
EXPECT_EQ(video_header.format, ImageFormat::SRGB);
EXPECT_EQ(video_header.width, 1);
EXPECT_EQ(video_header.height, 2);
EXPECT_EQ(video_header.frame_rate, 3);
EXPECT_EQ(packet.Timestamp(), Timestamp::PreStream());
ASSERT_FALSE(poller.Next(&packet));
MP_EXPECT_OK(graph.WaitUntilDone());
}
TEST(VideoPreStreamCalculatorTest, ProcessesWithFrameRateInPreStream) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "frame"
input_stream: "input_header"
node {
calculator: "VideoPreStreamCalculator"
input_stream: "FRAME:frame"
input_stream: "VIDEO_PRESTREAM:input_header"
output_stream: "output_header"
})");
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
auto poller_status = graph.AddOutputStreamPoller("output_header");
MP_ASSERT_OK(poller_status.status());
OutputStreamPoller& poller = poller_status.ValueOrDie();
MP_ASSERT_OK(graph.StartRun({}));
auto input_header = absl::make_unique<VideoHeader>();
input_header->frame_rate = 3.0;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_header",
Adopt(input_header.release()).At(Timestamp::PreStream())));
MP_ASSERT_OK(graph.CloseInputStream("input_header"));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"frame",
Adopt(new ImageFrame(ImageFormat::SRGB, 1, 2)).At(Timestamp(0))));
MP_ASSERT_OK(graph.CloseInputStream("frame"));
Packet packet;
ASSERT_TRUE(poller.Next(&packet));
const auto& output_header = packet.Get<VideoHeader>();
EXPECT_EQ(output_header.format, ImageFormat::SRGB);
EXPECT_EQ(output_header.width, 1);
EXPECT_EQ(output_header.height, 2);
EXPECT_EQ(output_header.frame_rate, 3.0);
EXPECT_EQ(packet.Timestamp(), Timestamp::PreStream());
ASSERT_FALSE(poller.Next(&packet));
MP_EXPECT_OK(graph.WaitUntilDone());
}
TEST(VideoPreStreamCalculatorTest, FailsWithoutFrameRateInOptions) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "frame"
node {
calculator: "VideoPreStreamCalculator"
input_stream: "frame"
output_stream: "output_header"
})");
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"frame",
Adopt(new ImageFrame(ImageFormat::SRGB, 1, 2)).At(Timestamp(0))));
MP_ASSERT_OK(graph.CloseInputStream("frame"));
::mediapipe::Status status = graph.WaitUntilDone();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
testing::HasSubstr("frame rate should be non-zero"));
}
// Input header missing.
TEST(VideoPreStreamCalculatorTest, FailsWithoutFrameRateInPreStream1) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "frame"
input_stream: "input_header"
node {
calculator: "VideoPreStreamCalculator"
input_stream: "FRAME:frame"
input_stream: "VIDEO_PRESTREAM:input_header"
output_stream: "output_header"
}
)");
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"frame",
Adopt(new ImageFrame(ImageFormat::SRGB, 1, 2)).At(Timestamp(0))));
MP_ASSERT_OK(graph.CloseInputStream("frame"));
MP_ASSERT_OK(graph.CloseInputStream("input_header"));
::mediapipe::Status status = graph.WaitUntilDone();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
testing::HasSubstr("frame rate should be non-zero"));
}
// Input header not at prestream (before, with, and after frame data).
TEST(VideoPreStreamCalculatorTest, FailsWithoutFrameRateInPreStream2) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "frame"
input_stream: "input_header"
node {
calculator: "VideoPreStreamCalculator"
input_stream: "FRAME:frame"
input_stream: "VIDEO_PRESTREAM:input_header"
output_stream: "output_header"
}
)");
for (int64 timestamp = -1; timestamp < 2; ++timestamp) {
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_header = absl::make_unique<VideoHeader>();
input_header->frame_rate = 3.0;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_header",
Adopt(input_header.release()).At(Timestamp(timestamp))));
MP_ASSERT_OK(graph.CloseInputStream("input_header"));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"frame",
Adopt(new ImageFrame(ImageFormat::SRGB, 1, 2)).At(Timestamp(0))));
MP_ASSERT_OK(graph.CloseInputStream("frame"));
::mediapipe::Status status = graph.WaitUntilDone();
EXPECT_FALSE(status.ok());
}
}
} // namespace
} // namespace mediapipe

321
mediapipe/docs/autoflip.md Normal file
View File

@ -0,0 +1,321 @@
# Saliency-Aware Video Cropping using AutoFlip
## Introduction
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This
example focuses on demonstrating how to use AutoFlip to convert an input video
to arbitrary aspect ratios.
For overall context on AutoFlip, please read this
[Google AI Blog](https://mediapipe.page.link/autoflip).
![graph is_required](images/autoflip_edited_example.gif)
## Building
Run the following command to build the AutoFlip pipeline:
```bash
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/autoflip:run_autoflip
```
## Running
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/autoflip/run_autoflip \
--calculator_graph_config_file=mediapipe/examples/desktop/autoflip/autoflip_graph.pbtxt \
--input_side_packets=input_video_path=/absolute/path/to/the/local/video/file,\
output_video_path=/absolute/path/to/save/the/output/video/file,\
aspect_ratio=9:16
```
Use the `aspect_ratio` flag to provide the output aspect ratio. The format
should be `weight:height`, where the `weight` and `height` are two positive
integers. AutoFlip supports both landscape-to-portrait and portrait-to-landscape
conversions. The pipeline internally compares the target aspect ratio against
the original one, and determines the correct conversion automatically.
We have put a couple test videos under this
[Google Drive folder](https://drive.google.com/corp/drive/u/0/folders/1KK9LV--Ey0UEVpxssVLhVl7dypgJSQgk).
You could download the videos into your local file system, then modify the
command above accordingly to run AutoFlip against the videos.
## MediaPipe Graph
![graph visualization](images/autoflip_graph.png)
To visualize the graph as shown above, copy the text specification of the graph
below and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev).
```bash
# Autoflip graph that only renders the final cropped video. For use with
# end user applications.
max_queue_size: -1
# VIDEO_PREP: Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:video_raw"
output_stream: "VIDEO_PRESTREAM:video_header"
output_side_packet: "SAVED_AUDIO_PATH:audio_path"
}
# VIDEO_PREP: Scale the input video before feature extraction.
node {
calculator: "ScaleImageCalculator"
input_stream: "FRAMES:video_raw"
input_stream: "VIDEO_HEADER:video_header"
output_stream: "FRAMES:video_frames_scaled"
node_options: {
[type.googleapis.com/mediapipe.ScaleImageCalculatorOptions]: {
preserve_aspect_ratio: true
output_format: SRGB
target_width: 480
algorithm: DEFAULT_WITHOUT_UPSCALE
}
}
}
# VIDEO_PREP: Create a low frame rate stream for feature extraction.
node {
calculator: "PacketThinnerCalculator"
input_stream: "video_frames_scaled"
output_stream: "video_frames_scaled_downsampled"
node_options: {
[type.googleapis.com/mediapipe.PacketThinnerCalculatorOptions]: {
thinner_type: ASYNC
period: 500000
}
}
}
# DETECTION: find borders around the video and major background color.
node {
calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:video_raw"
output_stream: "DETECTED_BORDERS:borders"
}
# DETECTION: find shot/scene boundaries on the full frame rate stream.
node {
calculator: "ShotBoundaryCalculator"
input_stream: "VIDEO:video_frames_scaled"
output_stream: "IS_SHOT_CHANGE:shot_change"
options {
[type.googleapis.com/mediapipe.autoflip.ShotBoundaryCalculatorOptions] {
min_shot_span: 0.2
min_motion: 0.3
window_size: 15
min_shot_measure: 10
min_motion_with_shot_measure: 0.05
}
}
}
# DETECTION: find faces on the down sampled stream
node {
calculator: "AutoFlipFaceDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:face_detections"
}
node {
calculator: "FaceToRegionCalculator"
input_stream: "VIDEO:video_frames_scaled_downsampled"
input_stream: "FACES:face_detections"
output_stream: "REGIONS:face_regions"
}
# DETECTION: find objects on the down sampled stream
node {
calculator: "AutoFlipObjectDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:object_detections"
}
node {
calculator: "LocalizationToRegionCalculator"
input_stream: "DETECTIONS:object_detections"
output_stream: "REGIONS:object_regions"
options {
[type.googleapis.com/mediapipe.autoflip.LocalizationToRegionCalculatorOptions] {
output_all_signals: true
}
}
}
# SIGNAL FUSION: Combine detections (with weights) on each frame
node {
calculator: "SignalFusingCalculator"
input_stream: "shot_change"
input_stream: "face_regions"
input_stream: "object_regions"
output_stream: "salient_regions"
options {
[type.googleapis.com/mediapipe.autoflip.SignalFusingCalculatorOptions] {
signal_settings {
type { standard: FACE_CORE_LANDMARKS }
min_score: 0.85
max_score: 0.9
is_required: false
}
signal_settings {
type { standard: FACE_ALL_LANDMARKS }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type { standard: FACE_FULL }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type: { standard: HUMAN }
min_score: 0.75
max_score: 0.8
is_required: false
}
signal_settings {
type: { standard: PET }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: CAR }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: OBJECT }
min_score: 0.1
max_score: 0.2
is_required: false
}
}
}
}
# CROPPING: make decisions about how to crop each frame.
node {
calculator: "SceneCroppingCalculator"
input_side_packet: "EXTERNAL_ASPECT_RATIO:aspect_ratio"
input_stream: "VIDEO_FRAMES:video_raw"
input_stream: "KEY_FRAMES:video_frames_scaled_downsampled"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:borders"
input_stream: "SHOT_BOUNDARIES:shot_change"
output_stream: "CROPPED_FRAMES:cropped_frames"
node_options: {
[type.googleapis.com/mediapipe.autoflip.SceneCroppingCalculatorOptions]: {
max_scene_size: 600
key_frame_crop_options: {
score_aggregation_type: CONSTANT
}
scene_camera_motion_analyzer_options: {
motion_stabilization_threshold_percent: 0.3
salient_point_bound: 0.499
}
padding_parameters: {
blur_cv_size: 200
overlay_opacity: 0.6
}
target_size_type: MAXIMIZE_TARGET_DIMENSION
}
}
}
# ENCODING(required): encode the video stream for the final cropped output.
node {
calculator: "VideoPreStreamCalculator"
# Fetch frame format and dimension from input frames.
input_stream: "FRAME:cropped_frames"
# Copying frame rate and duration from original video.
input_stream: "VIDEO_PRESTREAM:video_header"
output_stream: "output_frames_video_header"
}
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:cropped_frames"
input_stream: "VIDEO_PRESTREAM:output_frames_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
input_side_packet: "AUDIO_FILE_PATH:audio_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}
```
## Advanced Parameters
### Required vs. Best-Effort Saliency Features
AutoFlip allows users to implement and specify custom features to be used in the
camera trajectory computation. If the user would like to detect and preserve
scenes of lions in a wildlife protection video, for example, they could
implement and add a feature detection calculator for lions into the pipeline.
Refer to `AutoFlipFaceDetectionSubgraph` and `FaceToRegionCalculator`, or
`AutoFlipObjectDetectionSubgraph` and `LocalizationToRegionCalculator` for
examples of how to create new feature detection calculators.
After adding different feature signals into the graph, use the
`SignalFusingCalculator` node to specify types and weights for different feature
signals. For example, in the graph above, we specified a `face_region` and an
`object_region` input streams, to represent face signals and agnostic object
signals, respectively.
The larger the weight, the more important the features will be considered when
AutoFlip computes the camera trajectory. Use the `is_required` flag to mark a
feature as a hard constraint, in which case the computed camera trajectory will
try best to cover these feature types in the cropped videos. If for some reason
the required features cannot be all covered (for example, when they are too
spread out in the video), AutoFlip will apply a padding effect to cover as much
salient content as possible. See an illustration below.
![graph is_required](images/autoflip_is_required.gif)
### Visualization to Facilitate Debugging
`SceneCroppingCalculator` provides two extra output streams
`KEY_FRAME_CROP_REGION_VIZ_FRAMES` and `SALIENT_POINT_FRAME_VIZ_FRAMES` to
visualize the cropping window as well as salient points detected on each frame.
You could modify the `SceneCroppingCalculator` node like below to enable these
two output streams.
```bash
node {
calculator: "SceneCroppingCalculator"
input_side_packet: "EXTERNAL_ASPECT_RATIO:aspect_ratio"
input_stream: "VIDEO_FRAMES:video_raw"
input_stream: "KEY_FRAMES:video_frames_scaled_downsampled"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:borders"
input_stream: "SHOT_BOUNDARIES:shot_change"
output_stream: "CROPPED_FRAMES:cropped_frames"
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
node_options: {
[type.googleapis.com/mediapipe.autoflip.SceneCroppingCalculatorOptions]: {
max_scene_size: 600
key_frame_crop_options: {
score_aggregation_type: CONSTANT
}
scene_camera_motion_analyzer_options: {
motion_stabilization_threshold_percent: 0.3
salient_point_bound: 0.499
}
padding_parameters: {
blur_cv_size: 200
overlay_opacity: 0.6
}
target_size_type: MAXIMIZE_TARGET_DIMENSION
}
}
}
```

View File

@ -151,7 +151,6 @@ node {
input_stream: "room_video_tick_signal"
output_stream: "cloned_room_mic_signal"
output_stream: "cloned_lighting_sensor"
output_stream: "cloned_video_tick_signal"
}
```

View File

@ -108,9 +108,9 @@ and model details are described in the
[Hello World for C++](./hello_world_desktop.md) shows how to run a simple graph
using the MediaPipe C++ APIs.
### Feature Extration for YouTube-8M Challenge
### Feature Extraction and Model Inference for YouTube-8M Challenge
[Feature Extration and Model Inference for YouTube-8M Challenge](./youtube_8m.md)
[Feature Extraction and Model Inference for YouTube-8M Challenge](./youtube_8m.md)
shows how to use MediaPipe to prepare training data for the YouTube-8M Challenge
and do the model inference with the baseline model.
@ -120,6 +120,11 @@ and do the model inference with the baseline model.
MediaPipe for media processing to prepare video data sets for training a
TensorFlow model.
### Automatic video cropping
[AutoFlip](./autoflip.md) shows how to use MediaPipe to build an automatic video
cropping pipeline that can convert an input video to arbitrary aspect ratios.
### Object Detection on Desktop
[Object Detection on Desktop](./object_detection_desktop.md) shows how to run

View File

@ -2,9 +2,10 @@
MediaPipe is able to run cross platform across device types like desktop, mobile
and edge devices. Here is an example of running MediaPipe
[face detection pipeline](./face_detection_desktop.md) on edge device like
[Google Coral dev board](https://coral.withgoogle.com/products/dev-board) with
[Edge TPU](https://cloud.google.com/edge-tpu/). This MediaPipe Coral face
[face detection pipeline](./face_detection_desktop.md) on edge device like the
[Coral Dev Board](https://coral.ai/products/dev-board).
This MediaPipe Coral face
detection pipeline is running [coral specific quantized version](https://github.com/google/mediapipe/blob/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite)
of the [MediaPipe face detection TFLite model](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front.tflite)
accelerated on Edge TPU.
@ -13,8 +14,10 @@ accelerated on Edge TPU.
We recommend building the MediaPipe binaries not on the edge device due to
limited compute resulting in long build times. Instead, we will build MediaPipe
binaries using Docker containers on a more powerful host machine. For step by
step details of cross compiling and running MediaPipe binaries on Coral dev
board, please refer to [README.md in MediaPipe Coral example folder](https://github.com/google/mediapipe/blob/master/mediapipe/examples/coral/README.md).
binaries using Docker containers on a more powerful host machine.
![Face Detection running on Coral](images/face_detection_demo_coral.jpg)
For step by
step details of cross compiling and running MediaPipe binaries on the Coral Dev
Board, please refer to [README.md in MediaPipe Coral example folder](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral).
![Face Detection running on Coral](images/face_detection_demo_coral.gif)

View File

@ -24,9 +24,9 @@ Mediapipe. At a minimum, a new calculator must implement the below four methods
* `GetContract()`
* Calculator authors can specify the expected types of inputs and outputs of a calculator in GetContract(). When a graph is initialized, the framework calls a static method to verify if the packet types of the connected inputs and outputs match the information in this specification.
* `Open()`
* After a graph starts, the framework calls `Open()`. The input side packets are available to the calculator at this point. `Open()` interprets the node configuration (see Section \ref{graph_config}) operations and prepares the calculator's per-graph-run state. This function may also write packets to calculator outputs. An error during `Open()` can terminate the graph run.
* After a graph starts, the framework calls `Open()`. The input side packets are available to the calculator at this point. `Open()` interprets the node configuration operations (see Section [GraphConfig](#graphconfig)) and prepares the calculator's per-graph-run state. This function may also write packets to calculator outputs. An error during `Open()` can terminate the graph run.
* `Process()`
* For a calculator with inputs, the framework calls `Process()` repeatedly whenever at least one input stream has a packet available. The framework by default guarantees that all inputs have the same timestamp (see Section \ref{scheduling} for more information). Multiple `Process()` calls can be invoked simultaneously when parallel execution is enabled. If an error occurs during `Process()`, the framework calls `Close()` and the graph run terminates.
* For a calculator with inputs, the framework calls `Process()` repeatedly whenever at least one input stream has a packet available. The framework by default guarantees that all inputs have the same timestamp (see [Framework Architecture](scheduling_sync.md) for more information). Multiple `Process()` calls can be invoked simultaneously when parallel execution is enabled. If an error occurs during `Process()`, the framework calls `Close()` and the graph run terminates.
* `Close()`
* After all calls to `Process()` finish or when all input streams close, the framework calls `Close()`. This function is always called if `Open()` was called and succeeded and even if the graph run terminated because of an error. No inputs are available via any input streams during `Close()`, but it still has access to input side packets and therefore may write outputs. After `Close()` returns, the calculator should be considered a dead node. The calculator object is destroyed as soon as the graph finishes running.

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 MiB

After

Width:  |  Height:  |  Size: 145 KiB

View File

@ -39,19 +39,12 @@ To build and run iOS apps:
$ cd mediapipe
```
2. Install Bazel (0.24.1 and above required).
2. Install Bazel (version between 0.24.1 and 1.2.1).
Option 1. Use package manager tool to install the latest version of Bazel.
```bash
$ sudo apt-get install bazel
# Run 'bazel version' to check version of bazel installed
```
Option 2. Follow the official
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-ubuntu.html)
to install any version of Bazel manually.
to install Bazel manually. Note that MediaPipe doesn't support Bazel 2.0.0+
yet.
3. Install OpenCV and FFmpeg.
@ -158,11 +151,12 @@ To build and run iOS apps:
$ cd mediapipe
```
2. Install Bazel (0.24.1 and above required).
2. Install Bazel (version between 0.24.1 and 1.2.1).
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-redhat.html)
to install Bazel manually.
to install Bazel manually. Note that MediaPipe doesn't support Bazel 2.0.0+
yet.
3. Install OpenCV.
@ -361,6 +355,9 @@ To build and run iOS apps:
### Installing on Windows Subsystem for Linux (WSL)
Note: WSL has historically not provided access to USB cameras. Mediapipe can use
a video file as input.
1. Follow the
[instruction](https://docs.microsoft.com/en-us/windows/wsl/install-win10) to
install Windows Sysystem for Linux (Ubuntu).
@ -383,7 +380,7 @@ To build and run iOS apps:
username@DESKTOP-TMVLBJ1:~$ sudo apt-get update && sudo apt-get install -y build-essential git python zip adb openjdk-8-jdk
```
5. Install Bazel (0.24.1 and above required).
5. Install Bazel (version between 0.24.1 and 1.2.1).
```bash
username@DESKTOP-TMVLBJ1:~$ curl -sLO --retry 5 --retry-max-time 10 \

View File

@ -3,8 +3,9 @@
MediaPipe is able to run cross platform across device types like desktop, mobile
and edge devices. Here is an example of running MediaPipe
[object detection pipeline](./object_detection_desktop.md) on edge device like
[Google Coral dev board](https://coral.withgoogle.com/products/dev-board) with
[Edge TPU](https://cloud.google.com/edge-tpu/). This MediaPipe Coral object
the [Coral Dev Board](https://coral.ai/products/dev-board).
This MediaPipe Coral object
detection pipeline is running [coral specific quantized version](https://github.com/google/mediapipe/blob/master/mediapipe/examples/coral/models/object-detector-quantized_edgetpu.tflite)
of the [MediaPipe object detection TFLite model](https://github.com/google/mediapipe/blob/master/mediapipe/models/object_detection_front.tflite)
accelerated on Edge TPU.
@ -13,8 +14,10 @@ accelerated on Edge TPU.
We recommend building the MediaPipe binaries not on the edge device due to
limited compute resulting in long build times. Instead, we will build MediaPipe
binaries using Docker containers on a more powerful host machine. For step by
step details of cross compiling and running MediaPipe binaries on Coral dev
board, please refer to [README.md in MediaPipe Coral example folder](https://github.com/google/mediapipe/blob/master/mediapipe/examples/coral/README.md).
binaries using Docker containers on a more powerful host machine.
For step by
step details of cross compiling and running MediaPipe binaries on the Coral Dev
Board, please refer to [README.md in MediaPipe Coral example folder](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral).
![Object Detection running on Coral](images/object_detection_demo_coral.jpg)

View File

@ -6,7 +6,7 @@ that performs object detection and tracking.
Note that object detection is using TensorFlow Lite on GPU while tracking is using CPU.
For overall context on object detection and tracking, please read this
[Google Developer Blog](https://mediapipe.page.link/objecttrackingblog).
[Google Developers Blog](https://mediapipe.page.link/objecttrackingblog).
![object_tracking_android_gpu_gif](images/mobile/object_tracking_android_gpu.gif)
@ -238,7 +238,7 @@ tracking library that can be used for other use cases.
![object_tracking_subgraph](images/mobile/object_tracking_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_tracking_gpu.pbtxt)
```bash
# MediaPipe object tracking subgraph.
@ -304,7 +304,7 @@ node: {
![box_tracking_subgraph](images/mobile/box_tracking_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/box_tracking_gpu.pbtxt)
```bash
# MediaPipe box tracking subgraph.
@ -473,3 +473,19 @@ node {
}
```
## Desktop
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/object_tracking)
Note that object detection is using TensorFlow Lite on CPU and tracking is using
CPU.
To build and run the app:
```bash
bazel build -c opt mediapipe/examples/desktop/object_tracking:object_tracking_cpu
bazel-bin/mediapipe/examples/desktop/object_tracking/object_tracking_cpu \
--calculator_graph_config_file=mediapipe/graphs/tracking/object_detection_tracking_desktop_live.pbtxt
```

View File

@ -1,4 +1,4 @@
# Feature Extration and Model Inference for YouTube-8M Challenge
# Feature Extraction and Model Inference for YouTube-8M Challenge
MediaPipe is a useful and general framework for media processing that can assist
with research, development, and deployment of ML models. This example focuses on
@ -31,7 +31,9 @@ videos.
### Steps to run the YouTube-8M feature extraction graph
1. Checkout the mediapipe repository.
1. Checkout the repository and follow
[the installation instructions](https://github.com/google/mediapipe/blob/master/mediapipe/docs/install.md)
to set up MediaPipe.
```bash
git clone https://github.com/google/mediapipe.git

View File

@ -62,7 +62,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -62,7 +62,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -61,7 +61,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -62,7 +62,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -83,7 +83,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -83,7 +83,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -62,7 +62,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -55,6 +55,7 @@ android_library(
resource_files = glob(["res/**"]),
deps = [
":mediapipe_jni_lib",
"//mediapipe/framework/formats:detection_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
@ -62,7 +63,6 @@ android_library(
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_material",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@androidx_concurrent_futures//jar",

View File

@ -17,18 +17,22 @@ package com.google.mediapipe.apps.objectdetectiongpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Log;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.framework.PacketGetter;
import com.google.mediapipe.glutil.EglManager;
import java.util.List;
/** Main activity of MediaPipe example apps. */
public class MainActivity extends AppCompatActivity {
@ -37,6 +41,7 @@ public class MainActivity extends AppCompatActivity {
private static final String BINARY_GRAPH_NAME = "objectdetectiongpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final String OUTPUT_DETECTIONS_STREAM_NAME = "output_detections";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
@ -90,6 +95,14 @@ public class MainActivity extends AppCompatActivity {
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
processor.addPacketCallback(
OUTPUT_DETECTIONS_STREAM_NAME,
(packet) -> {
Log.d(TAG, "Received detections packet.");
List<Detection> detections = PacketGetter.getProtoVector(packet, Detection.parser());
Log.d(TAG, "[TS:" + packet.getTimestamp() + "] " + getDetectionsDebugString(detections));
});
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@ -164,4 +177,22 @@ public class MainActivity extends AppCompatActivity {
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
private static String getDetectionsDebugString(List<Detection> detections) {
if (detections.isEmpty()) {
return "No detections";
}
String detectionsStr = "Number of objects detected: " + detections.size() + "\n";
int objectIndex = 0;
for (Detection detection : detections) {
detectionsStr += "\t#Object[" + objectIndex + "]: \n";
List<String> labels = detection.getLabelList();
List<Float> scores = detection.getScoreList();
for (int i = 0; i < labels.size(); ++i) {
detectionsStr += "\t\tLabel [" + i + "]: " + labels.get(i) + ", " + scores.get(i) + "\n";
}
++objectIndex;
}
return detectionsStr;
}
}

View File

@ -1,8 +1,12 @@
# Coral Dev Board Setup (experimental)
**Dislaimer**: Running MediaPipe on Coral is experimental, and this process may not be exact and is subject to change. These instructions have only been tested on the coral dev board with OS version _mendel day_, and may vary for different devices and workstations.
**Dislaimer**: Running MediaPipe on Coral is experimental, and this process may
not be exact and is subject to change. These instructions have only been tested
on the [Coral Dev Board](https://coral.ai/products/dev-board/) with Mendel 4.0,
and may vary for different devices and workstations.
This file describes how to prepare a Google Coral Dev Board and setup a linux Docker container for building MediaPipe applications that run on Edge TPU.
This file describes how to prepare a Coral Dev Board and setup a Linux
Docker container for building MediaPipe applications that run on Edge TPU.
## Before creating the Docker

View File

@ -13,6 +13,7 @@
// limitations under the License.
//
// An example of sending OpenCV webcam frames into a MediaPipe graph.
#include <cstdlib>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
@ -89,7 +90,6 @@ DEFINE_string(output_video_path, "",
MP_RETURN_IF_ERROR(graph.StartRun({}));
LOG(INFO) << "Start grabbing and processing frames.";
size_t frame_timestamp = 0;
bool grab_frames = true;
while (grab_frames) {
// Capture opencv camera or video frame.
@ -110,9 +110,11 @@ DEFINE_string(output_video_path, "",
camera_frame.copyTo(input_frame_mat);
// Send image packet into the graph.
size_t frame_timestamp_us =
(double)cv::getTickCount() / (double)cv::getTickFrequency() * 1e6;
MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
kInputStream, mediapipe::Adopt(input_frame.release())
.At(mediapipe::Timestamp(frame_timestamp++))));
.At(mediapipe::Timestamp(frame_timestamp_us))));
// Get the graph result packet, or stop if that fails.
mediapipe::Packet packet;
@ -144,8 +146,9 @@ int main(int argc, char** argv) {
::mediapipe::Status run_status = RunMPPGraph();
if (!run_status.ok()) {
LOG(ERROR) << "Failed to run the graph: " << run_status.message();
return EXIT_FAILURE;
} else {
LOG(INFO) << "Success!";
}
return 0;
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,56 @@
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
proto_library(
name = "autoflip_messages_proto",
srcs = ["autoflip_messages.proto"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "autoflip_messages_cc_proto",
srcs = ["autoflip_messages.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":autoflip_messages_proto"],
)
cc_binary(
name = "run_autoflip",
deps = [
"//mediapipe/calculators/core:packet_thinner_calculator",
"//mediapipe/calculators/image:scale_image_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
"//mediapipe/calculators/video:video_pre_stream_calculator",
"//mediapipe/examples/desktop:simple_run_graph_main",
"//mediapipe/examples/desktop/autoflip/calculators:border_detection_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:face_to_region_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:localization_to_region_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:scene_cropping_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:shot_boundary_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:signal_fusing_calculator",
"//mediapipe/examples/desktop/autoflip/calculators:video_filtering_calculator",
"//mediapipe/examples/desktop/autoflip/subgraph:autoflip_face_detection_subgraph",
"//mediapipe/examples/desktop/autoflip/subgraph:autoflip_object_detection_subgraph",
],
)

View File

@ -0,0 +1,25 @@
### Steps to run the AutoFlip video cropping graph
1. Checkout the repository and follow
[the installation instructions](https://github.com/google/mediapipe/blob/master/mediapipe/docs/install.md)
to set up MediaPipe.
```bash
git clone https://github.com/google/mediapipe.git
cd mediapipe
```
2. Build and run the run_autoflip binary to process a local video.
```bash
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/autoflip:run_autoflip
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/autoflip/run_autoflip \
--calculator_graph_config_file=mediapipe/examples/desktop/autoflip/autoflip_graph.pbtxt \
--input_side_packets=input_video_path=/absolute/path/to/the/local/video/file,\
output_video_path=/absolute/path/to/save/the/output/video/file,\
aspect_ratio=width:height
```
3. View the cropped video.

View File

@ -0,0 +1,202 @@
# Autoflip graph that only renders the final cropped video. For use with
# end user applications.
max_queue_size: -1
# VIDEO_PREP: Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:video_raw"
output_stream: "VIDEO_PRESTREAM:video_header"
output_side_packet: "SAVED_AUDIO_PATH:audio_path"
}
# VIDEO_PREP: Scale the input video before feature extraction.
node {
calculator: "ScaleImageCalculator"
input_stream: "FRAMES:video_raw"
input_stream: "VIDEO_HEADER:video_header"
output_stream: "FRAMES:video_frames_scaled"
options: {
[mediapipe.ScaleImageCalculatorOptions.ext]: {
preserve_aspect_ratio: true
output_format: SRGB
target_width: 480
algorithm: DEFAULT_WITHOUT_UPSCALE
}
}
}
# VIDEO_PREP: Create a low frame rate stream for feature extraction.
node {
calculator: "PacketThinnerCalculator"
input_stream: "video_frames_scaled"
output_stream: "video_frames_scaled_downsampled"
options: {
[mediapipe.PacketThinnerCalculatorOptions.ext]: {
thinner_type: ASYNC
period: 500000
}
}
}
# DETECTION: find borders around the video and major background color.
node {
calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:video_raw"
output_stream: "DETECTED_BORDERS:borders"
}
# DETECTION: find shot/scene boundaries on the full frame rate stream.
node {
calculator: "ShotBoundaryCalculator"
input_stream: "VIDEO:video_frames_scaled"
output_stream: "IS_SHOT_CHANGE:shot_change"
options {
[mediapipe.autoflip.ShotBoundaryCalculatorOptions.ext] {
min_shot_span: 0.2
min_motion: 0.3
window_size: 15
min_shot_measure: 10
min_motion_with_shot_measure: 0.05
}
}
}
# DETECTION: find faces on the down sampled stream
node {
calculator: "AutoFlipFaceDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:face_detections"
}
node {
calculator: "FaceToRegionCalculator"
input_stream: "VIDEO:video_frames_scaled_downsampled"
input_stream: "FACES:face_detections"
output_stream: "REGIONS:face_regions"
}
# DETECTION: find objects on the down sampled stream
node {
calculator: "AutoFlipObjectDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:object_detections"
}
node {
calculator: "LocalizationToRegionCalculator"
input_stream: "DETECTIONS:object_detections"
output_stream: "REGIONS:object_regions"
options {
[mediapipe.autoflip.LocalizationToRegionCalculatorOptions.ext] {
output_all_signals: true
}
}
}
# SIGNAL FUSION: Combine detections (with weights) on each frame
node {
calculator: "SignalFusingCalculator"
input_stream: "shot_change"
input_stream: "face_regions"
input_stream: "object_regions"
output_stream: "salient_regions"
options {
[mediapipe.autoflip.SignalFusingCalculatorOptions.ext] {
signal_settings {
type { standard: FACE_CORE_LANDMARKS }
min_score: 0.85
max_score: 0.9
is_required: false
}
signal_settings {
type { standard: FACE_ALL_LANDMARKS }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type { standard: FACE_FULL }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type: { standard: HUMAN }
min_score: 0.75
max_score: 0.8
is_required: false
}
signal_settings {
type: { standard: PET }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: CAR }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: OBJECT }
min_score: 0.1
max_score: 0.2
is_required: false
}
}
}
}
# CROPPING: make decisions about how to crop each frame.
node {
calculator: "SceneCroppingCalculator"
input_side_packet: "EXTERNAL_ASPECT_RATIO:aspect_ratio"
input_stream: "VIDEO_FRAMES:video_raw"
input_stream: "KEY_FRAMES:video_frames_scaled_downsampled"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:borders"
input_stream: "SHOT_BOUNDARIES:shot_change"
output_stream: "CROPPED_FRAMES:cropped_frames"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
max_scene_size: 600
key_frame_crop_options: {
score_aggregation_type: CONSTANT
}
scene_camera_motion_analyzer_options: {
motion_stabilization_threshold_percent: 0.3
salient_point_bound: 0.499
}
padding_parameters: {
blur_cv_size: 200
overlay_opacity: 0.6
}
target_size_type: MAXIMIZE_TARGET_DIMENSION
}
}
}
# ENCODING(required): encode the video stream for the final cropped output.
node {
calculator: "VideoPreStreamCalculator"
# Fetch frame format and dimension from input frames.
input_stream: "FRAME:cropped_frames"
# Copying frame rate and duration from original video.
input_stream: "VIDEO_PRESTREAM:video_header"
output_stream: "output_frames_video_header"
}
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:cropped_frames"
input_stream: "VIDEO_PRESTREAM:output_frames_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
input_side_packet: "AUDIO_FILE_PATH:audio_path"
options: {
[mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,252 @@
# Autoflip graph that renders the final cropped video and debugging videos.
# For use by developers who may be adding signals and adjusting weights.
max_queue_size: -1
# VIDEO_PREP: Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:video_raw"
output_stream: "VIDEO_PRESTREAM:video_header"
output_side_packet: "SAVED_AUDIO_PATH:audio_path"
}
# VIDEO_PREP: Scale the input video before feature extraction.
node {
calculator: "ScaleImageCalculator"
input_stream: "FRAMES:video_raw"
input_stream: "VIDEO_HEADER:video_header"
output_stream: "FRAMES:video_frames_scaled"
options: {
[mediapipe.ScaleImageCalculatorOptions.ext]: {
preserve_aspect_ratio: true
output_format: SRGB
target_width: 480
algorithm: DEFAULT_WITHOUT_UPSCALE
}
}
}
# VIDEO_PREP: Create a low frame rate stream for feature extraction.
node {
calculator: "PacketThinnerCalculator"
input_stream: "video_frames_scaled"
output_stream: "video_frames_scaled_downsampled"
options: {
[mediapipe.PacketThinnerCalculatorOptions.ext]: {
thinner_type: ASYNC
period: 500000
}
}
}
# DETECTION: find borders around the video and major background color.
node {
calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:video_raw"
output_stream: "DETECTED_BORDERS:borders"
}
# DETECTION: find shot/scene boundaries on the full frame rate stream.
node {
calculator: "ShotBoundaryCalculator"
input_stream: "VIDEO:video_frames_scaled"
output_stream: "IS_SHOT_CHANGE:shot_change"
options {
[mediapipe.autoflip.ShotBoundaryCalculatorOptions.ext] {
min_shot_span: 0.2
min_motion: 0.3
window_size: 15
min_shot_measure: 10
min_motion_with_shot_measure: 0.05
}
}
}
# DETECTION: find faces on the down sampled stream
node {
calculator: "AutoFlipFaceDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:face_detections"
}
node {
calculator: "FaceToRegionCalculator"
input_stream: "VIDEO:video_frames_scaled_downsampled"
input_stream: "FACES:face_detections"
output_stream: "REGIONS:face_regions"
}
# DETECTION: find objects on the down sampled stream
node {
calculator: "AutoFlipObjectDetectionSubgraph"
input_stream: "VIDEO:video_frames_scaled_downsampled"
output_stream: "DETECTIONS:object_detections"
}
node {
calculator: "LocalizationToRegionCalculator"
input_stream: "DETECTIONS:object_detections"
output_stream: "REGIONS:object_regions"
options {
[mediapipe.autoflip.LocalizationToRegionCalculatorOptions.ext] {
output_all_signals: true
}
}
}
# SIGNAL FUSION: Combine detections (with weights) on each frame
node {
calculator: "SignalFusingCalculator"
input_stream: "shot_change"
input_stream: "face_regions"
input_stream: "object_regions"
output_stream: "salient_regions"
options {
[mediapipe.autoflip.SignalFusingCalculatorOptions.ext] {
signal_settings {
type { standard: FACE_CORE_LANDMARKS }
min_score: 0.85
max_score: 0.9
is_required: false
}
signal_settings {
type { standard: FACE_ALL_LANDMARKS }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type { standard: FACE_FULL }
min_score: 0.8
max_score: 0.85
is_required: false
}
signal_settings {
type: { standard: HUMAN }
min_score: 0.75
max_score: 0.8
is_required: false
}
signal_settings {
type: { standard: PET }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: CAR }
min_score: 0.7
max_score: 0.75
is_required: false
}
signal_settings {
type: { standard: OBJECT }
min_score: 0.1
max_score: 0.2
is_required: false
}
}
}
}
# CROPPING: make decisions about how to crop each frame.
node {
calculator: "SceneCroppingCalculator"
input_side_packet: "EXTERNAL_ASPECT_RATIO:aspect_ratio"
input_stream: "VIDEO_FRAMES:video_raw"
input_stream: "KEY_FRAMES:video_frames_scaled_downsampled"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:borders"
input_stream: "SHOT_BOUNDARIES:shot_change"
output_stream: "CROPPED_FRAMES:cropped_frames"
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
max_scene_size: 600
key_frame_crop_options: {
score_aggregation_type: CONSTANT
}
scene_camera_motion_analyzer_options: {
motion_stabilization_threshold_percent: 0.3
salient_point_bound: 0.499
}
padding_parameters: {
blur_cv_size: 200
overlay_opacity: 0.6
}
target_size_type: MAXIMIZE_TARGET_DIMENSION
}
}
}
# ENCODING(required): encode the video stream for the final cropped output.
node {
calculator: "VideoPreStreamCalculator"
# Fetch frame format and dimension from input frames.
input_stream: "FRAME:cropped_frames"
# Copying frame rate and duration from original video.
input_stream: "VIDEO_PRESTREAM:video_header"
output_stream: "output_frames_video_header"
}
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:cropped_frames"
input_stream: "VIDEO_PRESTREAM:output_frames_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
input_side_packet: "AUDIO_FILE_PATH:audio_path"
options: {
[mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
codec: "avc1"
video_format: "mp4"
}
}
}
# ENCODING(optional): encode the video stream for the key_frame_crop_viz_frames
# output. Draws boxes around required and non-required objects.
node {
calculator: "VideoPreStreamCalculator"
# Fetch frame format and dimension from input frames.
input_stream: "FRAME:key_frame_crop_viz_frames"
# Copying frame rate and duration from original video.
input_stream: "VIDEO_PRESTREAM:video_header"
output_stream: "key_frame_crop_viz_frames_header"
}
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:key_frame_crop_viz_frames"
input_stream: "VIDEO_PRESTREAM:key_frame_crop_viz_frames_header"
input_side_packet: "OUTPUT_FILE_PATH:key_frame_crop_viz_frames_path"
options: {
[mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
codec: "avc1"
video_format: "mp4"
}
}
}
# ENCODING(optional): encode the video stream for the salient_point_viz_frames
# output. Draws the focus points and the scene crop window (red).
node {
calculator: "VideoPreStreamCalculator"
# Fetch frame format and dimension from input frames.
input_stream: "FRAME:salient_point_viz_frames"
# Copying frame rate and duration from original video.
input_stream: "VIDEO_PRESTREAM:video_header"
output_stream: "salient_point_viz_frames_header"
}
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:salient_point_viz_frames"
input_stream: "VIDEO_PRESTREAM:salient_point_viz_frames_header"
input_side_packet: "OUTPUT_FILE_PATH:salient_point_viz_frames_path"
options: {
[mediapipe.OpenCvVideoEncoderCalculatorOptions.ext]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,153 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Proto messages used for the AutoFlip Pipeline.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator.proto";
// Borders detected on the frame as well as non-border color (if present).
// Next tag: 4
message StaticFeatures {
// A list of the static parts for a frame.
repeated Border border = 1;
// The background color (only set if solid color).
optional Color solid_background = 2;
// Area of the image that is not a border.
optional Rect non_static_area = 3;
}
// A static border area within the video.
// Next tag: 3
message Border {
// Original location within the input frame.
optional Rect border_position = 1;
// Position for static area.
// Next tag: 3
enum RelativePosition {
TOP = 1;
BOTTOM = 2;
}
// Top or bottom position.
optional RelativePosition relative_position = 2;
}
// Rectangle (opencv format).
// Next tag: 5
message Rect {
optional int32 x = 1;
optional int32 y = 2;
optional int32 width = 3;
optional int32 height = 4;
}
// Color (RGB 8bit)
// Next tag: 4
message Color {
optional int32 r = 1;
optional int32 g = 2;
optional int32 b = 3;
}
// Rectangle (opencv format).
// Next tag: 5
message RectF {
optional float x = 1;
optional float y = 2;
optional float width = 3;
optional float height = 4;
}
// An image region of interest (eg a detected face or object), accompanied by an
// importance score.
// Next tag: 9
message SalientRegion {
reserved 3;
// The bounding box for this region in the image.
optional Rect location = 1;
// The bounding box for this region in the image normalized.
optional RectF location_normalized = 8;
// A score indicating the importance of this region.
optional float score = 2;
// A tracking id used to identify this region across video frames. Not always
// set.
optional int64 tracking_id = 4;
// If true, this region is required to be present in the final video (eg it
// contains text that cannot be cropped).
optional bool is_required = 5 [default = false];
// Type of signal carried in this message.
optional SignalType signal_type = 6;
// If true, object cannot move in the output window (e.g. text would look
// strange moving around).
optional bool requires_static_location = 7 [default = false];
}
// Stores the message type, including standard types (face, object) and custom
// types defined by a string id.
// Next tag: 3
message SignalType {
enum StandardType {
UNSET = 0;
// Full face bounding boxed detected.
FACE_FULL = 1;
// Face landmarks for eyes, nose, chin only.
FACE_CORE_LANDMARKS = 2;
// All face landmarks (eyes, ears, nose, chin).
FACE_ALL_LANDMARKS = 3;
// A specific face landmark.
FACE_LANDMARK = 4;
HUMAN = 5;
CAR = 6;
PET = 7;
OBJECT = 8;
MOTION = 9;
TEXT = 10;
LOGO = 11;
USER_HINT = 12;
}
oneof Signal {
StandardType standard = 1;
string custom = 2;
}
}
// Features extracted from a image.
// Next tag: 3
message DetectionSet {
// Mask image showing pixel-wise values at a given location.
optional string encoded_mask = 1;
// List of rectangle detections.
repeated SalientRegion detections = 2;
}
// General settings needed for multiple calculators.
message ConversionOptions {
extend mediapipe.CalculatorOptions {
optional ConversionOptions ext = 284806832;
}
// Target output width of the conversion.
optional int32 target_width = 1;
// Target output height of the conversion.
optional int32 target_height = 2;
}
// TODO: Move other autoflip messages into this area.

View File

@ -0,0 +1,426 @@
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
cc_library(
name = "border_detection_calculator",
srcs = ["border_detection_calculator.cc"],
deps = [
":border_detection_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
proto_library(
name = "border_detection_calculator_proto",
srcs = ["border_detection_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "border_detection_calculator_cc_proto",
srcs = ["border_detection_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":border_detection_calculator_proto"],
)
cc_test(
name = "border_detection_calculator_test",
srcs = [
"border_detection_calculator_test.cc",
],
linkstatic = 1,
deps = [
":border_detection_calculator",
":border_detection_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:benchmark",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "video_filtering_calculator",
srcs = ["video_filtering_calculator.cc"],
copts = ["-fexceptions"],
features = ["-use_header_modules"], # Incompatible with -fexceptions.
deps = [
":video_filtering_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
)
proto_library(
name = "video_filtering_calculator_proto",
srcs = ["video_filtering_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "video_filtering_calculator_cc_proto",
srcs = ["video_filtering_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":video_filtering_calculator_proto"],
)
cc_test(
name = "video_filtering_calculator_test",
srcs = ["video_filtering_calculator_test.cc"],
deps = [
":video_filtering_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
proto_library(
name = "scene_cropping_calculator_proto",
srcs = ["scene_cropping_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/examples/desktop/autoflip/quality:cropping_proto",
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "scene_cropping_calculator_cc_proto",
srcs = ["scene_cropping_calculator.proto"],
cc_deps = [
"//mediapipe/examples/desktop/autoflip/quality:cropping_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
],
visibility = ["//visibility:public"],
deps = [":scene_cropping_calculator_proto"],
)
cc_library(
name = "scene_cropping_calculator",
srcs = ["scene_cropping_calculator.cc"],
hdrs = ["scene_cropping_calculator.h"],
deps = [
":scene_cropping_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/examples/desktop/autoflip/quality:cropping_cc_proto",
"//mediapipe/examples/desktop/autoflip/quality:focus_point_cc_proto",
"//mediapipe/examples/desktop/autoflip/quality:frame_crop_region_computer",
"//mediapipe/examples/desktop/autoflip/quality:padding_effect_generator",
"//mediapipe/examples/desktop/autoflip/quality:piecewise_linear_function",
"//mediapipe/examples/desktop/autoflip/quality:polynomial_regression_path_solver",
"//mediapipe/examples/desktop/autoflip/quality:scene_camera_motion_analyzer",
"//mediapipe/examples/desktop/autoflip/quality:scene_cropper",
"//mediapipe/examples/desktop/autoflip/quality:scene_cropping_viz",
"//mediapipe/examples/desktop/autoflip/quality:utils",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings:str_format",
],
alwayslink = 1, # buildozer: disable=alwayslink-with-hdrs
)
cc_test(
name = "scene_cropping_calculator_test",
size = "large",
timeout = "long",
srcs = ["scene_cropping_calculator_test.cc"],
deps = [
":scene_cropping_calculator",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
)
cc_library(
name = "signal_fusing_calculator",
srcs = ["signal_fusing_calculator.cc"],
deps = [
":signal_fusing_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
proto_library(
name = "signal_fusing_calculator_proto",
srcs = ["signal_fusing_calculator.proto"],
deps = [
"//mediapipe/examples/desktop/autoflip:autoflip_messages_proto",
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "signal_fusing_calculator_cc_proto",
srcs = ["signal_fusing_calculator.proto"],
cc_deps = [
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":signal_fusing_calculator_proto"],
)
cc_test(
name = "signal_fusing_calculator_test",
srcs = ["signal_fusing_calculator_test.cc"],
linkstatic = 1,
deps = [
":signal_fusing_calculator",
":signal_fusing_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "shot_boundary_calculator",
srcs = ["shot_boundary_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":shot_boundary_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
proto_library(
name = "shot_boundary_calculator_proto",
srcs = ["shot_boundary_calculator.proto"],
deps = [
"//mediapipe/examples/desktop/autoflip:autoflip_messages_proto",
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "shot_boundary_calculator_cc_proto",
srcs = ["shot_boundary_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":shot_boundary_calculator_proto"],
)
cc_test(
name = "shot_boundary_calculator_test",
srcs = ["shot_boundary_calculator_test.cc"],
data = ["//mediapipe/examples/desktop/autoflip/calculators/testdata:test_images"],
linkstatic = 1,
deps = [
":shot_boundary_calculator",
":shot_boundary_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "face_to_region_calculator",
srcs = ["face_to_region_calculator.cc"],
deps = [
":face_to_region_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/examples/desktop/autoflip/quality:visual_scorer",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
)
proto_library(
name = "face_to_region_calculator_proto",
srcs = ["face_to_region_calculator.proto"],
deps = [
"//mediapipe/examples/desktop/autoflip/quality:visual_scorer_proto",
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "face_to_region_calculator_cc_proto",
srcs = ["face_to_region_calculator.proto"],
cc_deps = [
"//mediapipe/examples/desktop/autoflip/quality:visual_scorer_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":face_to_region_calculator_proto"],
)
cc_test(
name = "face_to_region_calculator_test",
srcs = ["face_to_region_calculator_test.cc"],
linkstatic = 1,
deps = [
":face_to_region_calculator",
":face_to_region_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
proto_library(
name = "localization_to_region_calculator_proto",
srcs = ["localization_to_region_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_proto",
],
)
mediapipe_cc_proto_library(
name = "localization_to_region_calculator_cc_proto",
srcs = ["localization_to_region_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":localization_to_region_calculator_proto"],
)
cc_library(
name = "localization_to_region_calculator",
srcs = ["localization_to_region_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":localization_to_region_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
)
cc_test(
name = "localization_to_region_calculator_test",
srcs = ["localization_to_region_calculator_test.cc"],
linkstatic = 1,
deps = [
":localization_to_region_calculator",
":localization_to_region_calculator_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)

View File

@ -0,0 +1,302 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This Calculator takes an ImageFrame and scales it appropriately.
#include <algorithm>
#include <memory>
#include <vector>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/border_detection_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
using mediapipe::Adopt;
using mediapipe::CalculatorBase;
using mediapipe::ImageFrame;
using mediapipe::PacketTypeSet;
using mediapipe::autoflip::Border;
constexpr char kDetectedBorders[] = "DETECTED_BORDERS";
constexpr int kMinBorderDistance = 5;
constexpr int kKMeansClusterCount = 4;
constexpr int kMaxPixelsToProcess = 300000;
constexpr char kVideoInputTag[] = "VIDEO";
namespace mediapipe {
namespace autoflip {
namespace {
// Sets rect values into a proto.
void SetRect(const cv::Rect& region,
const Border::RelativePosition& relative_position, Border* part) {
part->mutable_border_position()->set_x(region.x);
part->mutable_border_position()->set_y(region.y);
part->mutable_border_position()->set_width(region.width);
part->mutable_border_position()->set_height(region.height);
part->set_relative_position(relative_position);
}
} // namespace
// This calculator takes a sequence of images (video) and detects solid color
// borders as well as the dominant color of the non-border area. This per-frame
// information is passed to downstream calculators.
class BorderDetectionCalculator : public CalculatorBase {
public:
BorderDetectionCalculator() : frame_width_(-1), frame_height_(-1) {}
~BorderDetectionCalculator() override {}
BorderDetectionCalculator(const BorderDetectionCalculator&) = delete;
BorderDetectionCalculator& operator=(const BorderDetectionCalculator&) =
delete;
static mediapipe::Status GetContract(mediapipe::CalculatorContract* cc);
mediapipe::Status Open(mediapipe::CalculatorContext* cc) override;
mediapipe::Status Process(mediapipe::CalculatorContext* cc) override;
private:
// Given a color and image direction, check to see if a border of that color
// exists.
void DetectBorder(const cv::Mat& frame, const Color& color,
const Border::RelativePosition& direction,
StaticFeatures* features);
// Provide the percent this color shows up in a given image.
double ColorCount(const Color& mask_color, const cv::Mat& image) const;
// Set member vars (image size) and confirm no changes frame-to-frame.
mediapipe::Status SetAndCheckInputs(const cv::Mat& frame);
// Find the dominant color for a input image.
double FindDominantColor(const cv::Mat& image, Color* dominant_color);
// Frame width and height.
int frame_width_;
int frame_height_;
// Options for processing.
BorderDetectionCalculatorOptions options_;
};
REGISTER_CALCULATOR(BorderDetectionCalculator);
::mediapipe::Status BorderDetectionCalculator::Open(
mediapipe::CalculatorContext* cc) {
options_ = cc->Options<BorderDetectionCalculatorOptions>();
RET_CHECK_LT(options_.vertical_search_distance(), 0.5)
<< "Search distance must be less than half the full image.";
return ::mediapipe::OkStatus();
}
mediapipe::Status BorderDetectionCalculator::SetAndCheckInputs(
const cv::Mat& frame) {
if (frame_width_ < 0) {
frame_width_ = frame.cols;
}
if (frame_height_ < 0) {
frame_height_ = frame.rows;
}
RET_CHECK_EQ(frame.cols, frame_width_)
<< "Input frame dimensions must remain constant throughout the video.";
RET_CHECK_EQ(frame.rows, frame_height_)
<< "Input frame dimensions must remain constant throughout the video.";
RET_CHECK_EQ(frame.channels(), 3) << "Input video type must be 3-channel";
return ::mediapipe::OkStatus();
}
mediapipe::Status BorderDetectionCalculator::Process(
mediapipe::CalculatorContext* cc) {
if (!cc->Inputs().HasTag(kVideoInputTag) ||
cc->Inputs().Tag(kVideoInputTag).Value().IsEmpty()) {
return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Input tag VIDEO not set or empty at timestamp: "
<< cc->InputTimestamp().Value();
}
cv::Mat frame = mediapipe::formats::MatView(
&cc->Inputs().Tag(kVideoInputTag).Get<ImageFrame>());
MP_RETURN_IF_ERROR(SetAndCheckInputs(frame));
// Initialize output and set default values.
std::unique_ptr<StaticFeatures> features =
absl::make_unique<StaticFeatures>();
features->mutable_non_static_area()->set_x(0);
features->mutable_non_static_area()->set_width(frame_width_);
features->mutable_non_static_area()->set_y(options_.default_padding_px());
features->mutable_non_static_area()->set_height(
std::max(0, frame_height_ - options_.default_padding_px() * 2));
// Check for border at the top of the frame.
Color seed_color_top;
FindDominantColor(frame(cv::Rect(0, 0, frame_width_, 1)), &seed_color_top);
DetectBorder(frame, seed_color_top, Border::TOP, features.get());
// Check for border at the bottom of the frame.
Color seed_color_bottom;
FindDominantColor(frame(cv::Rect(0, frame_height_ - 1, frame_width_, 1)),
&seed_color_bottom);
DetectBorder(frame, seed_color_bottom, Border::BOTTOM, features.get());
// Check the non-border area for a dominant color.
cv::Mat non_static_frame = frame(
cv::Rect(features->non_static_area().x(), features->non_static_area().y(),
features->non_static_area().width(),
features->non_static_area().height()));
Color dominant_color_nonborder;
double dominant_color_percent =
FindDominantColor(non_static_frame, &dominant_color_nonborder);
if (dominant_color_percent > options_.solid_background_tol_perc()) {
auto* bg_color = features->mutable_solid_background();
bg_color->set_r(dominant_color_nonborder.r());
bg_color->set_g(dominant_color_nonborder.g());
bg_color->set_b(dominant_color_nonborder.b());
}
// Output result.
cc->Outputs()
.Tag(kDetectedBorders)
.AddPacket(Adopt(features.release()).At(cc->InputTimestamp()));
return ::mediapipe::OkStatus();
}
// Find the dominant color within an image.
double BorderDetectionCalculator::FindDominantColor(const cv::Mat& image_raw,
Color* dominant_color) {
cv::Mat image;
if (image_raw.total() > kMaxPixelsToProcess) {
float resize = kMaxPixelsToProcess / static_cast<float>(image_raw.total());
cv::resize(image_raw, image, cv::Size(), resize, resize);
} else {
image = image_raw;
}
cv::Mat float_data, cluster, cluster_center;
image.convertTo(float_data, CV_32F);
cv::Mat reshaped = float_data.reshape(1, float_data.total());
cv::kmeans(reshaped, kKMeansClusterCount, cluster,
cv::TermCriteria(CV_TERMCRIT_ITER, 5, 1.0), 1,
cv::KMEANS_PP_CENTERS, cluster_center);
std::vector<int> count(kKMeansClusterCount, 0);
for (int i = 0; i < cluster.rows; i++) {
count[cluster.at<int>(i, 0)]++;
}
auto max_cluster_ptr = std::max_element(count.begin(), count.end());
double max_cluster_perc =
*max_cluster_ptr / static_cast<double>(cluster.rows);
int max_cluster_idx = std::distance(count.begin(), max_cluster_ptr);
dominant_color->set_r(cluster_center.at<float>(max_cluster_idx, 2));
dominant_color->set_g(cluster_center.at<float>(max_cluster_idx, 1));
dominant_color->set_b(cluster_center.at<float>(max_cluster_idx, 0));
return max_cluster_perc;
}
double BorderDetectionCalculator::ColorCount(const Color& mask_color,
const cv::Mat& image) const {
int background_count = 0;
for (int i = 0; i < image.rows; i++) {
const uint8* row_ptr = image.ptr<uint8>(i);
for (int j = 0; j < image.cols * 3; j += 3) {
if (std::abs(mask_color.r() - static_cast<int>(row_ptr[j + 2])) <=
options_.color_tolerance() &&
std::abs(mask_color.g() - static_cast<int>(row_ptr[j + 1])) <=
options_.color_tolerance() &&
std::abs(mask_color.b() - static_cast<int>(row_ptr[j])) <=
options_.color_tolerance()) {
background_count++;
}
}
}
return background_count / static_cast<double>(image.rows * image.cols);
}
void BorderDetectionCalculator::DetectBorder(
const cv::Mat& frame, const Color& color,
const Border::RelativePosition& direction, StaticFeatures* features) {
// Search the entire image until we find an object, or hit the max search
// distance.
int search_distance =
(direction == Border::TOP || direction == Border::BOTTOM) ? frame.rows
: frame.cols;
search_distance *= options_.vertical_search_distance();
// Check if each next line has a dominant color that matches the given
// border color.
int last_border = -1;
for (int i = 0; i < search_distance; i++) {
cv::Rect current_row;
switch (direction) {
case Border::TOP:
current_row = cv::Rect(0, i, frame.cols, 1);
break;
case Border::BOTTOM:
current_row = cv::Rect(0, frame.rows - i - 1, frame.cols, 1);
break;
}
if (ColorCount(color, frame(current_row)) <
options_.border_color_pixel_perc()) {
break;
}
last_border = i;
}
// Reject results that are not borders (or too small).
if (last_border <= kMinBorderDistance || last_border == search_distance - 1) {
return;
}
// Apply defined padding.
last_border += options_.border_object_padding_px();
switch (direction) {
case Border::TOP:
SetRect(cv::Rect(0, 0, frame.cols, last_border), Border::TOP,
features->add_border());
features->mutable_non_static_area()->set_y(
last_border + features->non_static_area().y());
features->mutable_non_static_area()->set_height(
std::max(0, frame_height_ - (features->non_static_area().y() +
options_.default_padding_px())));
break;
case Border::BOTTOM:
SetRect(
cv::Rect(0, frame.rows - last_border - 1, frame.cols, last_border),
Border::BOTTOM, features->add_border());
features->mutable_non_static_area()->set_height(std::max(
0, frame.rows - (features->non_static_area().y() + last_border +
options_.default_padding_px())));
break;
}
}
::mediapipe::Status BorderDetectionCalculator::GetContract(
mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag(kVideoInputTag).Set<ImageFrame>();
cc->Outputs().Tag(kDetectedBorders).Set<StaticFeatures>();
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,44 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator.proto";
// Next tag: 7
message BorderDetectionCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional BorderDetectionCalculatorOptions ext = 276599815;
}
// Max difference in color to be considered the same (per rgb channel).
optional int32 color_tolerance = 1 [default = 6];
// Amount of padding to add around any object within the border that is
// resized to fit into the new border.
optional int32 border_object_padding_px = 2 [default = 5];
// Distance (as a percent of height) to search for a border.
optional float vertical_search_distance = 3 [default = .20];
// Percent of pixels matching border color to be a border
optional float border_color_pixel_perc = 4 [default = .995];
// Percent of pixels matching background to be a solid background frame
optional float solid_background_tol_perc = 5 [default = .5];
// Force a border of this size in pixels on top and bottom.
optional int32 default_padding_px = 6 [default = 0];
}

View File

@ -0,0 +1,397 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/border_detection_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/benchmark.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
using mediapipe::Adopt;
using mediapipe::CalculatorGraphConfig;
using mediapipe::CalculatorRunner;
using mediapipe::ImageFormat;
using mediapipe::ImageFrame;
using mediapipe::Packet;
using mediapipe::PacketTypeSet;
using mediapipe::ParseTextProtoOrDie;
using mediapipe::Timestamp;
using mediapipe::autoflip::Border;
namespace mediapipe {
namespace autoflip {
namespace {
const char kConfig[] = R"(
calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:camera_frames"
output_stream: "DETECTED_BORDERS:regions"
options:{
[mediapipe.autoflip.BorderDetectionCalculatorOptions.ext]:{
border_object_padding_px: 0
}
})";
const char kConfigPad[] = R"(
calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:camera_frames"
output_stream: "DETECTED_BORDERS:regions"
options:{
[mediapipe.autoflip.BorderDetectionCalculatorOptions.ext]:{
default_padding_px: 10
border_object_padding_px: 0
}
})";
const int kTestFrameWidth = 640;
const int kTestFrameHeight = 480;
const int kTestFrameLargeWidth = 1920;
const int kTestFrameLargeHeight = 1080;
const int kTestFrameWidthTall = 1200;
const int kTestFrameHeightTall = 2001;
TEST(BorderDetectionCalculatorTest, NoBorderTest) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(0, static_features.border().size());
EXPECT_EQ(0, static_features.non_static_area().x());
EXPECT_EQ(0, static_features.non_static_area().y());
EXPECT_EQ(kTestFrameWidth, static_features.non_static_area().width());
EXPECT_EQ(kTestFrameHeight, static_features.non_static_area().height());
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
}
TEST(BorderDetectionCalculatorTest, TopBorderTest) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
const int kTopBorderHeight = 50;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size());
const auto& part = static_features.border(0);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), 0);
EXPECT_EQ(part.border_position().width(), kTestFrameWidth);
EXPECT_LT(std::abs(part.border_position().height() - kTopBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(0, static_features.non_static_area().x());
EXPECT_EQ(kTopBorderHeight - 1, static_features.non_static_area().y());
EXPECT_EQ(kTestFrameWidth, static_features.non_static_area().width());
EXPECT_EQ(kTestFrameHeight - kTopBorderHeight + 1,
static_features.non_static_area().height());
}
TEST(BorderDetectionCalculatorTest, TopBorderPadTest) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigPad));
const int kTopBorderHeight = 50;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size());
const auto& part = static_features.border(0);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), 0);
EXPECT_EQ(part.border_position().width(), kTestFrameWidth);
EXPECT_LT(std::abs(part.border_position().height() - kTopBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(Border::TOP, part.relative_position());
EXPECT_EQ(0, static_features.non_static_area().x());
EXPECT_EQ(9 + kTopBorderHeight, static_features.non_static_area().y());
EXPECT_EQ(kTestFrameWidth, static_features.non_static_area().width());
EXPECT_EQ(kTestFrameHeight - 19 - kTopBorderHeight,
static_features.non_static_area().height());
}
TEST(BorderDetectionCalculatorTest, BottomBorderTest) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
const int kBottomBorderHeight = 50;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat bottom_image =
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
kTestFrameWidth, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size());
const auto& part = static_features.border(0);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), kTestFrameHeight - kBottomBorderHeight);
EXPECT_EQ(part.border_position().width(), kTestFrameWidth);
EXPECT_LT(std::abs(part.border_position().height() - kBottomBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(Border::BOTTOM, part.relative_position());
}
TEST(BorderDetectionCalculatorTest, TopBottomBorderTest) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
const int kBottomBorderHeight = 50;
const int kTopBorderHeight = 25;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat top_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
top_image.setTo(cv::Scalar(0, 255, 0));
cv::Mat bottom_image =
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
kTestFrameWidth, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(2, static_features.border().size());
auto part = static_features.border(0);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), 0);
EXPECT_EQ(part.border_position().width(), kTestFrameWidth);
EXPECT_LT(std::abs(part.border_position().height() - kTopBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(0, static_features.non_static_area().x());
EXPECT_EQ(kTopBorderHeight - 1, static_features.non_static_area().y());
EXPECT_EQ(kTestFrameWidth, static_features.non_static_area().width());
EXPECT_EQ(kTestFrameHeight - kTopBorderHeight - kBottomBorderHeight + 2,
static_features.non_static_area().height());
EXPECT_EQ(Border::TOP, part.relative_position());
part = static_features.border(1);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), kTestFrameHeight - kBottomBorderHeight);
EXPECT_EQ(part.border_position().width(), kTestFrameWidth);
EXPECT_LT(std::abs(part.border_position().height() - kBottomBorderHeight), 2);
EXPECT_EQ(Border::BOTTOM, part.relative_position());
}
TEST(BorderDetectionCalculatorTest, TopBottomBorderTestAspect2) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
const int kBottomBorderHeight = 50;
const int kTopBorderHeight = 25;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidthTall, kTestFrameHeightTall);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat top_image =
input_mat(cv::Rect(0, 0, kTestFrameWidthTall, kTopBorderHeight));
top_image.setTo(cv::Scalar(0, 255, 0));
cv::Mat bottom_image =
input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight,
kTestFrameWidthTall, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(2, static_features.border().size());
auto part = static_features.border(0);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(), 0);
EXPECT_EQ(part.border_position().width(), kTestFrameWidthTall);
EXPECT_LT(std::abs(part.border_position().height() - kTopBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(Border::TOP, part.relative_position());
part = static_features.border(1);
EXPECT_EQ(part.border_position().x(), 0);
EXPECT_EQ(part.border_position().y(),
kTestFrameHeightTall - kBottomBorderHeight);
EXPECT_EQ(part.border_position().width(), kTestFrameWidthTall);
EXPECT_LT(std::abs(part.border_position().height() - kBottomBorderHeight), 2);
EXPECT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(0, static_features.solid_background().b());
EXPECT_EQ(Border::BOTTOM, part.relative_position());
}
TEST(BorderDetectionCalculatorTest, DominantColor) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigPad);
node.mutable_options()
->MutableExtension(BorderDetectionCalculatorOptions::ext)
->set_solid_background_tol_perc(.25);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat sub_image = input_mat(cv::Rect(
kTestFrameWidth / 2, 0, kTestFrameWidth / 2, kTestFrameHeight / 2));
sub_image.setTo(cv::Scalar(0, 255, 0));
sub_image = input_mat(cv::Rect(0, kTestFrameHeight / 2, kTestFrameWidth / 2,
kTestFrameHeight / 2));
sub_image.setTo(cv::Scalar(0, 0, 255));
sub_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2));
sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(0, static_features.border().size());
ASSERT_TRUE(static_features.has_solid_background());
EXPECT_EQ(0, static_features.solid_background().r());
EXPECT_EQ(0, static_features.solid_background().g());
EXPECT_EQ(255, static_features.solid_background().b());
}
void BM_Large(benchmark::State& state) {
for (auto _ : state) {
auto runner = ::absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig));
const int kTopBorderHeight = 50;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameLargeWidth, kTestFrameLargeHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator.
MP_ASSERT_OK(runner->Run());
}
}
BENCHMARK(BM_Large);
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,269 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/face_to_region_calculator.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/visual_scorer.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_builder.h"
namespace mediapipe {
namespace autoflip {
// This calculator converts detected faces to SalientRegion protos that can be
// used for downstream processing. Each SalientRegion is scored using image
// cues. Scoring can be controlled through
// FaceToRegionCalculator::scorer_options.
// Example:
// calculator: "FaceToRegionCalculator"
// input_stream: "VIDEO:frames"
// input_stream: "FACES:faces"
// output_stream: "REGIONS:regions"
// options:{
// [mediapipe.autoflip.FaceToRegionCalculatorOptions.ext]:{
// export_individual_face_landmarks: false
// export_whole_face: true
// }
// }
//
class FaceToRegionCalculator : public CalculatorBase {
public:
FaceToRegionCalculator();
~FaceToRegionCalculator() override {}
FaceToRegionCalculator(const FaceToRegionCalculator&) = delete;
FaceToRegionCalculator& operator=(const FaceToRegionCalculator&) = delete;
static ::mediapipe::Status GetContract(mediapipe::CalculatorContract* cc);
::mediapipe::Status Open(mediapipe::CalculatorContext* cc) override;
::mediapipe::Status Process(mediapipe::CalculatorContext* cc) override;
private:
double NormalizeX(const int pixel);
double NormalizeY(const int pixel);
// Extend the given SalientRegion to include the given point.
void ExtendSalientRegionWithPoint(const float x, const float y,
SalientRegion* region);
// Calculator options.
FaceToRegionCalculatorOptions options_;
// A scorer used to assign weights to faces.
std::unique_ptr<VisualScorer> scorer_;
// Dimensions of video frame
int frame_width_;
int frame_height_;
};
REGISTER_CALCULATOR(FaceToRegionCalculator);
FaceToRegionCalculator::FaceToRegionCalculator() {}
::mediapipe::Status FaceToRegionCalculator::GetContract(
mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
cc->Inputs().Tag("FACES").Set<std::vector<mediapipe::Detection>>();
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status FaceToRegionCalculator::Open(
mediapipe::CalculatorContext* cc) {
options_ = cc->Options<FaceToRegionCalculatorOptions>();
scorer_ = absl::make_unique<VisualScorer>(options_.scorer_options());
frame_width_ = -1;
frame_height_ = -1;
return ::mediapipe::OkStatus();
}
inline double FaceToRegionCalculator::NormalizeX(const int pixel) {
return pixel / static_cast<double>(frame_width_);
}
inline double FaceToRegionCalculator::NormalizeY(const int pixel) {
return pixel / static_cast<double>(frame_height_);
}
void FaceToRegionCalculator::ExtendSalientRegionWithPoint(
const float x, const float y, SalientRegion* region) {
auto* location = region->mutable_location_normalized();
if (!location->has_width()) {
location->set_width(NormalizeX(1));
} else if (x < location->x()) {
location->set_width(location->width() + location->x() - x);
} else if (x > location->x() + location->width()) {
location->set_width(x - location->x());
}
if (!location->has_height()) {
location->set_height(NormalizeY(1));
} else if (y < location->y()) {
location->set_height(location->height() + location->y() - y);
} else if (y > location->y() + location->height()) {
location->set_height(y - location->y());
}
if (!location->has_x()) {
location->set_x(x);
} else {
location->set_x(std::min(location->x(), x));
}
if (!location->has_y()) {
location->set_y(y);
} else {
location->set_y(std::min(location->y(), y));
}
}
::mediapipe::Status FaceToRegionCalculator::Process(
mediapipe::CalculatorContext* cc) {
if (cc->Inputs().Tag("VIDEO").Value().IsEmpty()) {
return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) << "No VIDEO input.";
}
cv::Mat frame =
mediapipe::formats::MatView(&cc->Inputs().Tag("VIDEO").Get<ImageFrame>());
frame_width_ = frame.cols;
frame_height_ = frame.rows;
auto region_set = ::absl::make_unique<DetectionSet>();
if (!cc->Inputs().Tag("FACES").Value().IsEmpty()) {
const auto& input_faces =
cc->Inputs().Tag("FACES").Get<std::vector<mediapipe::Detection>>();
for (const auto& input_face : input_faces) {
RET_CHECK(input_face.location_data().format() ==
mediapipe::LocationData::RELATIVE_BOUNDING_BOX)
<< "Face detection input is lacking required relative_bounding_box()";
// 6 landmarks should be provided, ordered as:
// Left eye, Right eye, Nose tip, Mouth center, Left ear tragion, Right
// ear tragion.
RET_CHECK(input_face.location_data().relative_keypoints().size() == 6)
<< "Face detection input expected 6 keypoints, has "
<< input_face.location_data().relative_keypoints().size();
const auto& location = input_face.location_data().relative_bounding_box();
// Reduce region size to only contain parts of the image in frame.
float x = std::max(0.0f, location.xmin());
float y = std::max(0.0f, location.ymin());
float width =
std::min(location.width() - abs(x - location.xmin()), 1 - x);
float height =
std::min(location.height() - abs(y - location.ymin()), 1 - y);
// Convert the face to a region.
if (options_.export_whole_face()) {
SalientRegion* region = region_set->add_detections();
region->mutable_location_normalized()->set_x(x);
region->mutable_location_normalized()->set_y(y);
region->mutable_location_normalized()->set_width(width);
region->mutable_location_normalized()->set_height(height);
region->mutable_signal_type()->set_standard(SignalType::FACE_FULL);
// Score the face based on image cues.
float visual_score = 1.0f;
if (options_.use_visual_scorer()) {
MP_RETURN_IF_ERROR(
scorer_->CalculateScore(frame, *region, &visual_score));
}
region->set_score(visual_score);
}
// Generate two more output regions from important face landmarks. One
// includes all exterior landmarks, such as ears and chin, and the
// other includes only interior landmarks, such as the eye edges and the
// mouth.
SalientRegion core_landmark_region, all_landmark_region;
// Keypoints are ordered: Left Eye, Right Eye, Nose Tip, Mouth Center,
// Left Ear Tragion, Right Ear Tragion.
// Set 'core' landmarks (Left Eye, Right Eye, Nose Tip, Mouth Center)
for (int i = 0; i < 4; i++) {
const auto& keypoint = input_face.location_data().relative_keypoints(i);
if (options_.export_individual_face_landmarks()) {
SalientRegion* region = region_set->add_detections();
region->mutable_location_normalized()->set_x(keypoint.x());
region->mutable_location_normalized()->set_y(keypoint.y());
region->mutable_location_normalized()->set_width(NormalizeX(1));
region->mutable_location_normalized()->set_height(NormalizeY(1));
region->mutable_signal_type()->set_standard(
SignalType::FACE_LANDMARK);
}
// Extend the core/full landmark regions to include the new
ExtendSalientRegionWithPoint(keypoint.x(), keypoint.y(),
&core_landmark_region);
ExtendSalientRegionWithPoint(keypoint.x(), keypoint.y(),
&all_landmark_region);
}
// Set 'all' landmarks (Left Ear Tragion, Right Ear Tragion + core)
for (int i = 4; i < 6; i++) {
const auto& keypoint = input_face.location_data().relative_keypoints(i);
if (options_.export_individual_face_landmarks()) {
SalientRegion* region = region_set->add_detections();
region->mutable_location()->set_x(keypoint.x());
region->mutable_location()->set_y(keypoint.y());
region->mutable_location()->set_width(NormalizeX(1));
region->mutable_location()->set_height(NormalizeY(1));
region->mutable_signal_type()->set_standard(
SignalType::FACE_LANDMARK);
}
// Extend the full landmark region to include the new landmark.
ExtendSalientRegionWithPoint(keypoint.x(), keypoint.y(),
&all_landmark_region);
}
// Generate scores for the landmark bboxes and export them.
if (options_.export_bbox_from_landmarks() &&
core_landmark_region.has_location_normalized()) { // Not empty.
float visual_score = 1.0f;
if (options_.use_visual_scorer()) {
MP_RETURN_IF_ERROR(scorer_->CalculateScore(
frame, core_landmark_region, &visual_score));
}
core_landmark_region.set_score(visual_score);
core_landmark_region.mutable_signal_type()->set_standard(
SignalType::FACE_CORE_LANDMARKS);
*region_set->add_detections() = core_landmark_region;
}
if (options_.export_bbox_from_landmarks() &&
all_landmark_region.has_location_normalized()) { // Not empty.
float visual_score = 1.0f;
if (options_.use_visual_scorer()) {
MP_RETURN_IF_ERROR(scorer_->CalculateScore(frame, all_landmark_region,
&visual_score));
}
all_landmark_region.set_score(visual_score);
all_landmark_region.mutable_signal_type()->set_standard(
SignalType::FACE_ALL_LANDMARKS);
*region_set->add_detections() = all_landmark_region;
}
}
}
cc->Outputs().Tag("REGIONS").Add(region_set.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,50 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/quality/visual_scorer.proto";
import "mediapipe/framework/calculator.proto";
// Next tag: 6
message FaceToRegionCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional FaceToRegionCalculatorOptions ext = 282401234;
}
// Options for generating a score for the entire face from its visual
// appearance. The generated score is used to modulate the detection scores
// for whole face and/or landmark bbox region types.
optional VisualScorerOptions scorer_options = 1;
// If true, export the large face bounding box generated by the face tracker.
// This bounding box is generally larger than the actual face and relatively
// inaccurate.
optional bool export_whole_face = 2 [default = false];
// If true, export a number of individual face landmarks (eyes, nose, mouth,
// ears etc) as separate SalientRegion protos.
optional bool export_individual_face_landmarks = 3 [default = false];
// If true, export two bounding boxes from landmarks (one for the core face
// landmarks like eyes and nose, and one for extended landmarks including ears
// and chin).
optional bool export_bbox_from_landmarks = 4 [default = true];
// If true, generate a score from the appearance of the face and use it to
// modulate the detection scores for whole face and/or landmark bboxes.
optional bool use_visual_scorer = 5 [default = true];
}

View File

@ -0,0 +1,247 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/face_to_region_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
using mediapipe::Detection;
namespace mediapipe {
namespace autoflip {
namespace {
const char kConfig[] = R"(
calculator: "FaceToRegionCalculator"
input_stream: "VIDEO:frames"
input_stream: "FACES:faces"
output_stream: "REGIONS:regions"
)";
const char kFace1[] = R"(location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: -0.00375
ymin: 0.003333
width: 0.125
height: 0.33333
}
relative_keypoints { x: 0.03125 y: 0.05 }
relative_keypoints { x: 0.0875 y: 0.0666666 }
relative_keypoints { x: 0.03125 y: 0.05 }
relative_keypoints { x: 0.0875 y: 0.0666666 }
relative_keypoints { x: 0.0250 y: 0.0666666 }
relative_keypoints { x: 0.0950 y: 0.0666666 }
})";
const char kFace2[] = R"(location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.0025
ymin: 0.005
width: 0.25
height: 0.5
}
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
})";
const char kFace3[] = R"(location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.0
ymin: 0.0
width: 0.5
height: 0.5
}
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
relative_keypoints { x: 0 y: 0 }
})";
void SetInputs(CalculatorRunner* runner,
const std::vector<std::string>& faces) {
// Setup an input video frame.
auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600);
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Setup two faces as input.
auto input_faces = ::absl::make_unique<std::vector<Detection>>();
// A face with landmarks.
for (const auto& face : faces) {
input_faces->push_back(ParseTextProtoOrDie<Detection>(face));
}
runner->MutableInputs()->Tag("FACES").packets.push_back(
Adopt(input_faces.release()).At(Timestamp::PostStream()));
}
CalculatorGraphConfig::Node MakeConfig(bool whole_face, bool landmarks,
bool bb_from_landmarks) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
config.mutable_options()
->MutableExtension(FaceToRegionCalculatorOptions::ext)
->set_export_whole_face(whole_face);
config.mutable_options()
->MutableExtension(FaceToRegionCalculatorOptions::ext)
->set_export_individual_face_landmarks(landmarks);
config.mutable_options()
->MutableExtension(FaceToRegionCalculatorOptions::ext)
->set_export_bbox_from_landmarks(bb_from_landmarks);
return config;
}
TEST(FaceToRegionCalculatorTest, FaceFullTypeSize) {
// Setup test
auto runner =
::absl::make_unique<CalculatorRunner>(MakeConfig(true, false, false));
SetInputs(runner.get(), {kFace1, kFace2});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(2, regions.detections().size());
auto face_1 = regions.detections(0);
EXPECT_EQ(face_1.signal_type().standard(), SignalType::FACE_FULL);
EXPECT_FLOAT_EQ(face_1.location_normalized().x(), 0);
EXPECT_FLOAT_EQ(face_1.location_normalized().y(), 0.003333);
EXPECT_FLOAT_EQ(face_1.location_normalized().width(), 0.12125);
EXPECT_FLOAT_EQ(face_1.location_normalized().height(), 0.33333);
EXPECT_FLOAT_EQ(face_1.score(), 0.040214583);
auto face_2 = regions.detections(1);
EXPECT_EQ(face_2.signal_type().standard(), SignalType::FACE_FULL);
EXPECT_FLOAT_EQ(face_2.location_normalized().x(), 0.0025);
EXPECT_FLOAT_EQ(face_2.location_normalized().y(), 0.005);
EXPECT_FLOAT_EQ(face_2.location_normalized().width(), 0.25);
EXPECT_FLOAT_EQ(face_2.location_normalized().height(), 0.5);
EXPECT_FLOAT_EQ(face_2.score(), 0.125);
}
TEST(FaceToRegionCalculatorTest, FaceLandmarksTypeSize) {
// Setup test
auto runner =
::absl::make_unique<CalculatorRunner>(MakeConfig(false, true, false));
SetInputs(runner.get(), {kFace1});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(6, regions.detections().size());
auto landmark_1 = regions.detections(0);
EXPECT_EQ(landmark_1.signal_type().standard(), SignalType::FACE_LANDMARK);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().x(), 0.03125);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().y(), 0.05);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().width(), 0.00125);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().height(), 0.0016666667);
auto landmark_2 = regions.detections(1);
EXPECT_EQ(landmark_2.signal_type().standard(), SignalType::FACE_LANDMARK);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().x(), 0.0875);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().y(), 0.0666666);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().width(), 0.00125);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().height(), 0.0016666667);
}
TEST(FaceToRegionCalculatorTest, FaceLandmarksBox) {
// Setup test
auto runner =
::absl::make_unique<CalculatorRunner>(MakeConfig(false, false, true));
SetInputs(runner.get(), {kFace1});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(2, regions.detections().size());
auto landmark_1 = regions.detections(0);
EXPECT_EQ(landmark_1.signal_type().standard(),
SignalType::FACE_CORE_LANDMARKS);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().x(), 0.03125);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().y(), 0.05);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().width(), 0.056249999);
EXPECT_FLOAT_EQ(landmark_1.location_normalized().height(), 0.016666602);
EXPECT_FLOAT_EQ(landmark_1.score(), 0.00084375002);
auto landmark_2 = regions.detections(1);
EXPECT_EQ(landmark_2.signal_type().standard(),
SignalType::FACE_ALL_LANDMARKS);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().x(), 0.025);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().y(), 0.050000001);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().width(), 0.07);
EXPECT_FLOAT_EQ(landmark_2.location_normalized().height(), 0.016666602);
EXPECT_FLOAT_EQ(landmark_2.score(), 0.00105);
}
TEST(FaceToRegionCalculatorTest, FaceScore) {
// Setup test
auto runner =
::absl::make_unique<CalculatorRunner>(MakeConfig(true, false, false));
SetInputs(runner.get(), {kFace3});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(1, regions.detections().size());
auto landmark_1 = regions.detections(0);
EXPECT_FLOAT_EQ(landmark_1.score(), 0.25);
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,126 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "absl/memory/memory.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/localization_to_region_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace autoflip {
// This calculator converts detections from ObjectLocalizationCalculator to
// SalientRegion protos that can be used for downstream processing.
class LocalizationToRegionCalculator : public mediapipe::CalculatorBase {
public:
LocalizationToRegionCalculator();
~LocalizationToRegionCalculator() override {}
LocalizationToRegionCalculator(const LocalizationToRegionCalculator&) =
delete;
LocalizationToRegionCalculator& operator=(
const LocalizationToRegionCalculator&) = delete;
static ::mediapipe::Status GetContract(mediapipe::CalculatorContract* cc);
::mediapipe::Status Open(mediapipe::CalculatorContext* cc) override;
::mediapipe::Status Process(mediapipe::CalculatorContext* cc) override;
private:
// Calculator options.
LocalizationToRegionCalculatorOptions options_;
};
REGISTER_CALCULATOR(LocalizationToRegionCalculator);
LocalizationToRegionCalculator::LocalizationToRegionCalculator() {}
namespace {
// Converts an object detection to a autoflip SignalType. Returns true if the
// std::string label has a autoflip label.
bool MatchType(const std::string& label, SignalType* type) {
if (label == "person") {
type->set_standard(SignalType::HUMAN);
return true;
}
if (label == "car" || label == "truck") {
type->set_standard(SignalType::CAR);
return true;
}
if (label == "dog" || label == "cat" || label == "bird" || label == "horse") {
type->set_standard(SignalType::PET);
return true;
}
return false;
}
// Converts a detection to a SalientRegion with a given label.
void FillSalientRegion(const mediapipe::Detection& detection,
const SignalType& label, SalientRegion* region) {
const auto& location = detection.location_data().relative_bounding_box();
region->mutable_location_normalized()->set_x(location.xmin());
region->mutable_location_normalized()->set_y(location.ymin());
region->mutable_location_normalized()->set_width(location.width());
region->mutable_location_normalized()->set_height(location.height());
region->set_score(1.0);
*region->mutable_signal_type() = label;
}
} // namespace
::mediapipe::Status LocalizationToRegionCalculator::GetContract(
mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag("DETECTIONS").Set<std::vector<mediapipe::Detection>>();
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status LocalizationToRegionCalculator::Open(
mediapipe::CalculatorContext* cc) {
options_ = cc->Options<LocalizationToRegionCalculatorOptions>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status LocalizationToRegionCalculator::Process(
mediapipe::CalculatorContext* cc) {
const auto& annotations =
cc->Inputs().Tag("DETECTIONS").Get<std::vector<mediapipe::Detection>>();
auto regions = ::absl::make_unique<DetectionSet>();
for (const auto& detection : annotations) {
RET_CHECK_EQ(detection.label().size(), 1)
<< "Number of labels not equal to one.";
SignalType autoflip_label;
if (MatchType(detection.label(0), &autoflip_label) &&
options_.output_standard_signals()) {
FillSalientRegion(detection, autoflip_label, regions->add_detections());
}
if (options_.output_all_signals()) {
SignalType object;
object.set_standard(SignalType::OBJECT);
FillSalientRegion(detection, object, regions->add_detections());
}
}
cc->Outputs().Tag("REGIONS").Add(regions.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,33 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator.proto";
message LocalizationToRegionCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional LocalizationToRegionCalculatorOptions ext = 284226721;
}
// Output standard autoflip signals only (Human, Pet, Car, etc) and apply
// standard autoflip labels.
optional bool output_standard_signals = 1 [default = true];
// Output all signals (regardless of label) and set autoflip label as
// 'Object'. Can be combined with output_standard_signals giving each
// detection a 'object' label and a autoflip sepcific label.
optional bool output_all_signals = 2 [default = false];
}

View File

@ -0,0 +1,164 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/localization_to_region_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
using mediapipe::Detection;
namespace mediapipe {
namespace autoflip {
namespace {
const char kConfig[] = R"(
calculator: "LocalizationToRegionCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "REGIONS:regions"
)";
const char kCar[] = R"(
label: "car"
location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: -0.00375
ymin: 0.003333
width: 0.125
height: 0.33333
}
})";
const char kDog[] = R"(
label: "dog"
location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.0025
ymin: 0.005
width: 0.25
height: 0.5
}
})";
const char kZebra[] = R"(
label: "zebra"
location_data {
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.0
ymin: 0.0
width: 0.5
height: 0.5
}
})";
void SetInputs(CalculatorRunner* runner,
const std::vector<std::string>& detections) {
auto inputs = ::absl::make_unique<std::vector<Detection>>();
// A face with landmarks.
for (const auto& detection : detections) {
inputs->push_back(ParseTextProtoOrDie<Detection>(detection));
}
runner->MutableInputs()
->Tag("DETECTIONS")
.packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream()));
}
CalculatorGraphConfig::Node MakeConfig(bool output_standard, bool output_all) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
config.mutable_options()
->MutableExtension(LocalizationToRegionCalculatorOptions::ext)
->set_output_standard_signals(output_standard);
config.mutable_options()
->MutableExtension(LocalizationToRegionCalculatorOptions::ext)
->set_output_all_signals(output_all);
return config;
}
TEST(LocalizationToRegionCalculatorTest, StandardTypes) {
// Setup test
auto runner = ::absl::make_unique<CalculatorRunner>(MakeConfig(true, false));
SetInputs(runner.get(), {kCar, kDog, kZebra});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(2, regions.detections().size());
const auto& detection = regions.detections(0);
EXPECT_EQ(detection.signal_type().standard(), SignalType::CAR);
EXPECT_FLOAT_EQ(detection.location_normalized().x(), -0.00375);
EXPECT_FLOAT_EQ(detection.location_normalized().y(), 0.003333);
EXPECT_FLOAT_EQ(detection.location_normalized().width(), 0.125);
EXPECT_FLOAT_EQ(detection.location_normalized().height(), 0.33333);
const auto& detection_1 = regions.detections(1);
EXPECT_EQ(detection_1.signal_type().standard(), SignalType::PET);
EXPECT_FLOAT_EQ(detection_1.location_normalized().x(), 0.0025);
EXPECT_FLOAT_EQ(detection_1.location_normalized().y(), 0.005);
EXPECT_FLOAT_EQ(detection_1.location_normalized().width(), 0.25);
EXPECT_FLOAT_EQ(detection_1.location_normalized().height(), 0.5);
}
TEST(LocalizationToRegionCalculatorTest, AllTypes) {
// Setup test
auto runner = ::absl::make_unique<CalculatorRunner>(MakeConfig(false, true));
SetInputs(runner.get(), {kCar, kDog, kZebra});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(3, regions.detections().size());
}
TEST(LocalizationToRegionCalculatorTest, BothTypes) {
// Setup test
auto runner = ::absl::make_unique<CalculatorRunner>(MakeConfig(true, true));
SetInputs(runner.get(), {kCar, kDog, kZebra});
// Run the calculator.
MP_ASSERT_OK(runner->Run());
// Check the output regions.
const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets;
ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(5, regions.detections().size());
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,589 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h"
#include <cmath>
#include "absl/memory/memory.h"
#include "absl/strings/str_format.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/scene_cropping_viz.h"
#include "mediapipe/examples/desktop/autoflip/quality/utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/timestamp.h"
namespace mediapipe {
namespace autoflip {
constexpr char kInputVideoFrames[] = "VIDEO_FRAMES";
constexpr char kInputKeyFrames[] = "KEY_FRAMES";
constexpr char kInputDetections[] = "DETECTION_FEATURES";
constexpr char kInputStaticFeatures[] = "STATIC_FEATURES";
constexpr char kInputShotBoundaries[] = "SHOT_BOUNDARIES";
constexpr char kInputExternalSettings[] = "EXTERNAL_SETTINGS";
// This side packet must be used in conjunction with
// TargetSizeType::MAXIMIZE_TARGET_DIMENSION
constexpr char kAspectRatio[] = "EXTERNAL_ASPECT_RATIO";
constexpr char kOutputCroppedFrames[] = "CROPPED_FRAMES";
constexpr char kOutputKeyFrameCropViz[] = "KEY_FRAME_CROP_REGION_VIZ_FRAMES";
constexpr char kOutputFocusPointFrameViz[] = "SALIENT_POINT_FRAME_VIZ_FRAMES";
constexpr char kOutputSummary[] = "CROPPING_SUMMARY";
::mediapipe::Status SceneCroppingCalculator::GetContract(
::mediapipe::CalculatorContract* cc) {
if (cc->InputSidePackets().HasTag(kInputExternalSettings)) {
cc->InputSidePackets().Tag(kInputExternalSettings).Set<std::string>();
}
if (cc->InputSidePackets().HasTag(kAspectRatio)) {
cc->InputSidePackets().Tag(kAspectRatio).Set<std::string>();
}
cc->Inputs().Tag(kInputVideoFrames).Set<ImageFrame>();
if (cc->Inputs().HasTag(kInputKeyFrames)) {
cc->Inputs().Tag(kInputKeyFrames).Set<ImageFrame>();
}
cc->Inputs().Tag(kInputDetections).Set<DetectionSet>();
if (cc->Inputs().HasTag(kInputStaticFeatures)) {
cc->Inputs().Tag(kInputStaticFeatures).Set<StaticFeatures>();
}
cc->Inputs().Tag(kInputShotBoundaries).Set<bool>();
cc->Outputs().Tag(kOutputCroppedFrames).Set<ImageFrame>();
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
cc->Outputs().Tag(kOutputKeyFrameCropViz).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
cc->Outputs().Tag(kOutputFocusPointFrameViz).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kOutputSummary)) {
cc->Outputs().Tag(kOutputSummary).Set<VideoCroppingSummary>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status SceneCroppingCalculator::Open(CalculatorContext* cc) {
options_ = cc->Options<SceneCroppingCalculatorOptions>();
RET_CHECK_GT(options_.max_scene_size(), 0)
<< "Maximum scene size is non-positive.";
RET_CHECK_GE(options_.prior_frame_buffer_size(), 0)
<< "Prior frame buffer size is negative.";
RET_CHECK(options_.solid_background_frames_padding_fraction() >= 0.0 &&
options_.solid_background_frames_padding_fraction() <= 1.0)
<< "Solid background frames padding fraction is not in [0, 1].";
const auto& padding_params = options_.padding_parameters();
background_contrast_ = padding_params.background_contrast();
RET_CHECK(background_contrast_ >= 0.0 && background_contrast_ <= 1.0)
<< "Background contrast " << background_contrast_ << " is not in [0, 1].";
blur_cv_size_ = padding_params.blur_cv_size();
RET_CHECK_GT(blur_cv_size_, 0) << "Blur cv size is non-positive.";
overlay_opacity_ = padding_params.overlay_opacity();
RET_CHECK(overlay_opacity_ >= 0.0 && overlay_opacity_ <= 1.0)
<< "Overlay opacity " << overlay_opacity_ << " is not in [0, 1].";
scene_cropper_ = absl::make_unique<SceneCropper>();
if (cc->Outputs().HasTag(kOutputSummary)) {
summary_ = absl::make_unique<VideoCroppingSummary>();
}
return ::mediapipe::OkStatus();
}
namespace {
::mediapipe::Status ParseAspectRatioString(
const std::string& aspect_ratio_string, double* aspect_ratio) {
std::string error_msg =
"Aspect ratio std::string must be in the format of 'width:height', e.g. "
"'1:1' or '5:4', your input was " +
aspect_ratio_string;
auto pos = aspect_ratio_string.find(":");
RET_CHECK(pos != std::string::npos) << error_msg;
double width_ratio;
RET_CHECK(absl::SimpleAtod(aspect_ratio_string.substr(0, pos), &width_ratio))
<< error_msg;
double height_ratio;
RET_CHECK(absl::SimpleAtod(
aspect_ratio_string.substr(pos + 1, aspect_ratio_string.size()),
&height_ratio))
<< error_msg;
*aspect_ratio = width_ratio / height_ratio;
return ::mediapipe::OkStatus();
}
} // namespace
::mediapipe::Status SceneCroppingCalculator::Process(
::mediapipe::CalculatorContext* cc) {
// Sets frame dimension and format.
if (frame_width_ < 0 &&
!cc->Inputs().Tag(kInputVideoFrames).Value().IsEmpty()) {
const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get<ImageFrame>();
frame_width_ = frame.Width();
RET_CHECK_GT(frame_width_, 0) << "Input frame width is non-positive.";
frame_height_ = frame.Height();
RET_CHECK_GT(frame_height_, 0) << "Input frame height is non-positive.";
frame_format_ = frame.Format();
target_width_ = options_.target_width();
target_height_ = options_.target_height();
if (cc->InputSidePackets().HasTag(kInputExternalSettings)) {
auto conversion_options = ParseTextProtoOrDie<ConversionOptions>(
cc->InputSidePackets()
.Tag(kInputExternalSettings)
.Get<std::string>());
target_width_ = conversion_options.target_width();
target_height_ = conversion_options.target_height();
}
target_aspect_ratio_ = static_cast<double>(target_width_) / target_height_;
RET_CHECK_NE(options_.target_size_type(),
SceneCroppingCalculatorOptions::UNKNOWN)
<< "TargetSizeType not set properly.";
// Resets target size if keep original height or width.
if (options_.target_size_type() ==
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_HEIGHT) {
target_height_ = frame_height_;
target_width_ = std::round(target_height_ * target_aspect_ratio_);
} else if (options_.target_size_type() ==
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_WIDTH) {
target_width_ = frame_width_;
target_height_ = std::round(target_width_ / target_aspect_ratio_);
} else if (options_.target_size_type() ==
SceneCroppingCalculatorOptions::MAXIMIZE_TARGET_DIMENSION) {
RET_CHECK(cc->InputSidePackets().HasTag(kAspectRatio))
<< "MAXIMIZE_TARGET_DIMENSION is set without an "
"external_aspect_ratio";
double requested_aspect_ratio;
MP_RETURN_IF_ERROR(ParseAspectRatioString(
cc->InputSidePackets().Tag(kAspectRatio).Get<std::string>(),
&requested_aspect_ratio));
const double original_aspect_ratio =
static_cast<double>(frame_width_) / frame_height_;
if (original_aspect_ratio > requested_aspect_ratio) {
target_height_ = frame_height_;
target_width_ = std::round(target_height_ * requested_aspect_ratio);
} else {
target_width_ = frame_width_;
target_height_ = std::round(target_width_ / requested_aspect_ratio);
}
}
// Makes sure that target size is even if keep original width or height.
if (options_.target_size_type() !=
SceneCroppingCalculatorOptions::USE_TARGET_DIMENSION) {
if (target_width_ % 2 == 1) {
target_width_ = std::max(2, target_width_ - 1);
}
if (target_height_ % 2 == 1) {
target_height_ = std::max(2, target_height_ - 1);
}
target_aspect_ratio_ =
static_cast<double>(target_width_) / target_height_;
}
// Set keyframe width/height for feature upscaling (overwritten by keyframe
// input if provided).
if (options_.has_video_features_width() &&
options_.has_video_features_height()) {
key_frame_width_ = options_.video_features_width();
key_frame_height_ = options_.video_features_height();
} else if (!cc->Inputs().HasTag(kInputKeyFrames)) {
key_frame_width_ = frame_width_;
key_frame_height_ = frame_height_;
}
// Check provided dimensions.
RET_CHECK_GT(target_width_, 0) << "Target width is non-positive.";
RET_CHECK_NE(target_width_ % 2, 1)
<< "Target width cannot be odd, because encoder expects dimension "
"values to be even.";
RET_CHECK_GT(target_height_, 0) << "Target height is non-positive.";
RET_CHECK_NE(target_height_ % 2, 1)
<< "Target height cannot be odd, because encoder expects dimension "
"values to be even.";
}
// Sets key frame dimension.
if (cc->Inputs().HasTag(kInputKeyFrames) &&
!cc->Inputs().Tag(kInputKeyFrames).Value().IsEmpty() &&
key_frame_width_ < 0) {
const auto& key_frame = cc->Inputs().Tag(kInputKeyFrames).Get<ImageFrame>();
key_frame_width_ = key_frame.Width();
key_frame_height_ = key_frame.Height();
}
// Processes a scene when shot boundary or buffer is full.
bool is_end_of_scene = false;
if (!cc->Inputs().Tag(kInputShotBoundaries).Value().IsEmpty()) {
is_end_of_scene = cc->Inputs().Tag(kInputShotBoundaries).Get<bool>();
}
const bool force_buffer_flush =
scene_frames_.size() >= options_.max_scene_size();
if (!scene_frames_.empty() && (is_end_of_scene || force_buffer_flush)) {
MP_RETURN_IF_ERROR(ProcessScene(is_end_of_scene, cc));
}
// Saves frame and timestamp and whether it is a key frame.
if (!cc->Inputs().Tag(kInputVideoFrames).Value().IsEmpty()) {
LOG_EVERY_N(ERROR, 10)
<< "------------------------ (Breathing) Time(s): "
<< cc->Inputs().Tag(kInputVideoFrames).Value().Timestamp().Seconds();
const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get<ImageFrame>();
const cv::Mat frame_mat = formats::MatView(&frame);
cv::Mat copy_mat;
frame_mat.copyTo(copy_mat);
scene_frames_.push_back(copy_mat);
scene_frame_timestamps_.push_back(cc->InputTimestamp().Value());
is_key_frames_.push_back(
!cc->Inputs().Tag(kInputDetections).Value().IsEmpty());
}
// Packs key frame info.
if (!cc->Inputs().Tag(kInputDetections).Value().IsEmpty()) {
const auto& detections =
cc->Inputs().Tag(kInputDetections).Get<DetectionSet>();
KeyFrameInfo key_frame_info;
MP_RETURN_IF_ERROR(PackKeyFrameInfo(
cc->InputTimestamp().Value(), detections, frame_width_, frame_height_,
key_frame_width_, key_frame_height_, &key_frame_info));
key_frame_infos_.push_back(key_frame_info);
}
// Buffers static features.
if (cc->Inputs().HasTag(kInputStaticFeatures) &&
!cc->Inputs().Tag(kInputStaticFeatures).Value().IsEmpty()) {
static_features_.push_back(
cc->Inputs().Tag(kInputStaticFeatures).Get<StaticFeatures>());
static_features_timestamps_.push_back(cc->InputTimestamp().Value());
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status SceneCroppingCalculator::Close(
::mediapipe::CalculatorContext* cc) {
if (!scene_frames_.empty()) {
MP_RETURN_IF_ERROR(ProcessScene(/* is_end_of_scene = */ true, cc));
}
if (cc->Outputs().HasTag(kOutputSummary)) {
cc->Outputs()
.Tag(kOutputSummary)
.Add(summary_.release(), Timestamp::PostStream());
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders() {
int top_border_size = 0, bottom_border_size = 0;
MP_RETURN_IF_ERROR(ComputeSceneStaticBordersSize(
static_features_, &top_border_size, &bottom_border_size));
const double scale = static_cast<double>(frame_height_) / key_frame_height_;
top_border_distance_ = std::round(scale * top_border_size);
const int bottom_border_distance = std::round(scale * bottom_border_size);
effective_frame_height_ =
frame_height_ - top_border_distance_ - bottom_border_distance;
if (top_border_distance_ > 0 || bottom_border_distance > 0) {
VLOG(1) << "Remove top border " << top_border_distance_ << " bottom border "
<< bottom_border_distance;
// Remove borders from frames.
cv::Rect roi(0, top_border_distance_, frame_width_,
effective_frame_height_);
for (int i = 0; i < scene_frames_.size(); ++i) {
cv::Mat tmp;
scene_frames_[i](roi).copyTo(tmp);
scene_frames_[i] = tmp;
}
// Adjust detection bounding boxes.
for (int i = 0; i < key_frame_infos_.size(); ++i) {
DetectionSet adjusted_detections;
const auto& detections = key_frame_infos_[i].detections();
for (int j = 0; j < detections.detections_size(); ++j) {
const auto& detection = detections.detections(j);
SalientRegion adjusted_detection = detection;
// Clamp the box to be within the de-bordered frame.
if (!ClampRect(0, top_border_distance_, frame_width_,
top_border_distance_ + effective_frame_height_,
adjusted_detection.mutable_location())
.ok()) {
continue;
}
// Offset the y position.
adjusted_detection.mutable_location()->set_y(
adjusted_detection.location().y() - top_border_distance_);
*adjusted_detections.add_detections() = adjusted_detection;
}
*key_frame_infos_[i].mutable_detections() = adjusted_detections;
}
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status
SceneCroppingCalculator::InitializeFrameCropRegionComputer() {
key_frame_crop_options_ = options_.key_frame_crop_options();
MP_RETURN_IF_ERROR(
SetKeyFrameCropTarget(frame_width_, effective_frame_height_,
target_aspect_ratio_, &key_frame_crop_options_));
VLOG(1) << "Target width " << key_frame_crop_options_.target_width();
VLOG(1) << "Target height " << key_frame_crop_options_.target_height();
frame_crop_region_computer_ =
absl::make_unique<FrameCropRegionComputer>(key_frame_crop_options_);
return ::mediapipe::OkStatus();
}
void SceneCroppingCalculator::FilterKeyFrameInfo() {
if (!options_.user_hint_override()) {
return;
}
std::vector<KeyFrameInfo> user_hints_only;
bool has_user_hints = false;
for (auto key_frame : key_frame_infos_) {
DetectionSet user_hint_only_set;
for (const auto& detection : key_frame.detections().detections()) {
if (detection.signal_type().has_standard() &&
detection.signal_type().standard() == SignalType::USER_HINT) {
*user_hint_only_set.add_detections() = detection;
has_user_hints = true;
}
}
*key_frame.mutable_detections() = user_hint_only_set;
user_hints_only.push_back(key_frame);
}
if (has_user_hints) {
key_frame_infos_ = user_hints_only;
}
}
::mediapipe::Status SceneCroppingCalculator::ProcessScene(
const bool is_end_of_scene, CalculatorContext* cc) {
// Removes detections under special circumstances.
FilterKeyFrameInfo();
// Removes any static borders.
MP_RETURN_IF_ERROR(RemoveStaticBorders());
// Decides if solid background color padding is possible and sets up color
// interpolation functions in CIELAB. Uses linear interpolation by default.
MP_RETURN_IF_ERROR(FindSolidBackgroundColor(
static_features_, static_features_timestamps_,
options_.solid_background_frames_padding_fraction(),
&has_solid_background_, &background_color_l_function_,
&background_color_a_function_, &background_color_b_function_));
// Computes key frame crop regions.
MP_RETURN_IF_ERROR(InitializeFrameCropRegionComputer());
const int num_key_frames = key_frame_infos_.size();
std::vector<KeyFrameCropResult> key_frame_crop_results(num_key_frames);
for (int i = 0; i < num_key_frames; ++i) {
MP_RETURN_IF_ERROR(frame_crop_region_computer_->ComputeFrameCropRegion(
key_frame_infos_[i], &key_frame_crop_results[i]));
}
// Analyzes scene camera motion and generates FocusPointFrames.
auto analyzer_options = options_.scene_camera_motion_analyzer_options();
analyzer_options.set_allow_sweeping(analyzer_options.allow_sweeping() &&
!has_solid_background_);
scene_camera_motion_analyzer_ =
absl::make_unique<SceneCameraMotionAnalyzer>(analyzer_options);
SceneKeyFrameCropSummary scene_summary;
std::vector<FocusPointFrame> focus_point_frames;
SceneCameraMotion scene_camera_motion;
MP_RETURN_IF_ERROR(
scene_camera_motion_analyzer_->AnalyzeSceneAndPopulateFocusPointFrames(
key_frame_infos_, key_frame_crop_options_, key_frame_crop_results,
frame_width_, effective_frame_height_, scene_frame_timestamps_,
&scene_summary, &focus_point_frames, &scene_camera_motion));
// Crops scene frames.
std::vector<cv::Mat> cropped_frames;
MP_RETURN_IF_ERROR(scene_cropper_->CropFrames(
scene_summary, scene_frames_, focus_point_frames,
prior_focus_point_frames_, &cropped_frames));
// Formats and outputs cropped frames.
bool apply_padding = false;
float vertical_fill_precent;
MP_RETURN_IF_ERROR(FormatAndOutputCroppedFrames(
cropped_frames, &apply_padding, &vertical_fill_precent, cc));
// Caches prior FocusPointFrames if this was not the end of a scene.
prior_focus_point_frames_.clear();
if (!is_end_of_scene) {
const int start = std::max(0, static_cast<int>(scene_frames_.size()) -
options_.prior_frame_buffer_size());
for (int i = start; i < num_key_frames; ++i) {
prior_focus_point_frames_.push_back(focus_point_frames[i]);
}
}
// Optionally outputs visualization frames.
MP_RETURN_IF_ERROR(OutputVizFrames(key_frame_crop_results, focus_point_frames,
scene_summary.crop_window_width(),
scene_summary.crop_window_height(), cc));
const double start_sec = Timestamp(scene_frame_timestamps_.front()).Seconds();
const double end_sec = Timestamp(scene_frame_timestamps_.back()).Seconds();
VLOG(1) << absl::StrFormat("Processed a scene from %.2f sec to %.2f sec",
start_sec, end_sec);
// Optionally makes summary.
if (cc->Outputs().HasTag(kOutputSummary)) {
auto* scene_summary = summary_->add_scene_summaries();
scene_summary->set_start_sec(start_sec);
scene_summary->set_end_sec(end_sec);
*(scene_summary->mutable_camera_motion()) = scene_camera_motion;
scene_summary->set_is_end_of_scene(is_end_of_scene);
scene_summary->set_is_padded(apply_padding);
}
key_frame_infos_.clear();
scene_frames_.clear();
scene_frame_timestamps_.clear();
is_key_frames_.clear();
static_features_.clear();
static_features_timestamps_.clear();
return ::mediapipe::OkStatus();
}
::mediapipe::Status SceneCroppingCalculator::FormatAndOutputCroppedFrames(
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
float* vertical_fill_precent, CalculatorContext* cc) {
RET_CHECK(apply_padding) << "Has padding boolean is null.";
if (cropped_frames.empty()) {
return ::mediapipe::OkStatus();
}
// Computes scaling factor and decides if padding is needed.
const int crop_width = cropped_frames.front().cols;
const int crop_height = cropped_frames.front().rows;
VLOG(1) << "crop_width = " << crop_width << " crop_height = " << crop_height;
const double scaling =
std::max(static_cast<double>(target_width_) / crop_width,
static_cast<double>(target_height_) / crop_height);
int scaled_width = std::round(scaling * crop_width);
int scaled_height = std::round(scaling * crop_height);
RET_CHECK_GE(scaled_width, target_width_)
<< "Scaled width is less than target width - something is wrong.";
RET_CHECK_GE(scaled_height, target_height_)
<< "Scaled height is less than target height - something is wrong.";
if (scaled_width - target_width_ <= 1) scaled_width = target_width_;
if (scaled_height - target_height_ <= 1) scaled_height = target_height_;
*apply_padding =
scaled_width != target_width_ || scaled_height != target_height_;
*vertical_fill_precent = scaled_height / static_cast<float>(target_height_);
if (*apply_padding) {
padder_ = absl::make_unique<PaddingEffectGenerator>(
scaled_width, scaled_height, target_aspect_ratio_);
VLOG(1) << "Scene is padded: scaled width = " << scaled_width
<< " target width = " << target_width_
<< " scaled height = " << scaled_height
<< " target height = " << target_height_;
}
// Resizes cropped frames, pads frames, and output frames.
cv::Scalar* background_color = nullptr;
cv::Scalar interpolated_color;
const int num_frames = cropped_frames.size();
for (int i = 0; i < num_frames; ++i) {
const int64 time_ms = scene_frame_timestamps_[i];
const Timestamp timestamp(time_ms);
auto scaled_frame = absl::make_unique<ImageFrame>(
frame_format_, scaled_width, scaled_height);
auto destination = formats::MatView(scaled_frame.get());
if (scaled_width == crop_width && scaled_height == crop_height) {
cropped_frames[i].copyTo(destination);
} else {
// cubic is better quality for upscaling and area is good for downscaling
const int interpolation_method =
scaling > 1 ? cv::INTER_CUBIC : cv::INTER_AREA;
cv::resize(cropped_frames[i], destination, destination.size(), 0, 0,
interpolation_method);
}
if (*apply_padding) {
if (has_solid_background_) {
double lab[3];
lab[0] = background_color_l_function_.Evaluate(time_ms);
lab[1] = background_color_a_function_.Evaluate(time_ms);
lab[2] = background_color_b_function_.Evaluate(time_ms);
cv::Mat3f lab_mat(1, 1, cv::Vec3f(lab[0], lab[1], lab[2]));
cv::Mat3f rgb_mat(1, 1);
// Necessary scaling of the RGB values from [0, 1] to [0, 255] based on:
// https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#cvtcolor
cv::cvtColor(lab_mat, rgb_mat, cv::COLOR_Lab2RGB);
rgb_mat *= 255.0;
auto k = rgb_mat.at<cv::Vec3f>(0, 0);
k[0] = k[0] < 0.0 ? 0.0 : k[0] > 255.0 ? 255.0 : k[0];
k[1] = k[1] < 0.0 ? 0.0 : k[1] > 255.0 ? 255.0 : k[1];
k[2] = k[2] < 0.0 ? 0.0 : k[2] > 255.0 ? 255.0 : k[2];
interpolated_color =
cv::Scalar(std::round(k[0]), std::round(k[1]), std::round(k[2]));
background_color = &interpolated_color;
}
auto padded_frame = absl::make_unique<ImageFrame>();
MP_RETURN_IF_ERROR(padder_->Process(
*scaled_frame, background_contrast_,
std::min({blur_cv_size_, scaled_width, scaled_height}),
overlay_opacity_, padded_frame.get(), background_color));
RET_CHECK_EQ(padded_frame->Width(), target_width_)
<< "Padded frame width is off.";
RET_CHECK_EQ(padded_frame->Height(), target_height_)
<< "Padded frame height is off.";
cc->Outputs()
.Tag(kOutputCroppedFrames)
.Add(padded_frame.release(), timestamp);
} else {
cc->Outputs()
.Tag(kOutputCroppedFrames)
.Add(scaled_frame.release(), timestamp);
}
}
return ::mediapipe::OkStatus();
}
mediapipe::Status SceneCroppingCalculator::OutputVizFrames(
const std::vector<KeyFrameCropResult>& key_frame_crop_results,
const std::vector<FocusPointFrame>& focus_point_frames,
const int crop_window_width, const int crop_window_height,
CalculatorContext* cc) const {
if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) {
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
MP_RETURN_IF_ERROR(DrawDetectionsAndCropRegions(
scene_frames_, is_key_frames_, key_frame_infos_, key_frame_crop_results,
frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_.size(); ++i) {
cc->Outputs()
.Tag(kOutputKeyFrameCropViz)
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));
}
}
if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) {
std::vector<std::unique_ptr<ImageFrame>> viz_frames;
MP_RETURN_IF_ERROR(DrawFocusPointAndCropWindow(
scene_frames_, focus_point_frames, options_.viz_overlay_opacity(),
crop_window_width, crop_window_height, frame_format_, &viz_frames));
for (int i = 0; i < scene_frames_.size(); ++i) {
cc->Outputs()
.Tag(kOutputFocusPointFrameViz)
.Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i]));
}
}
return ::mediapipe::OkStatus();
}
REGISTER_CALCULATOR(SceneCroppingCalculator);
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,249 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_CALCULATORS_SCENE_CROPPING_CALCULATOR_H_
#define MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_CALCULATORS_SCENE_CROPPING_CALCULATOR_H_
#include <memory>
#include <vector>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/cropping.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/focus_point.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/frame_crop_region_computer.h"
#include "mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h"
#include "mediapipe/examples/desktop/autoflip/quality/piecewise_linear_function.h"
#include "mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h"
#include "mediapipe/examples/desktop/autoflip/quality/scene_camera_motion_analyzer.h"
#include "mediapipe/examples/desktop/autoflip/quality/scene_cropper.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace autoflip {
// This calculator crops video scenes to target size, which can be of any aspect
// ratio. The calculator supports both "landscape -> portrait", and "portrait ->
// landscape" use cases. The two use cases are automatically determined by
// comparing the input and output frame's aspect ratios internally.
//
// The target (i.e. output) frame's dimension can be specified through the
// target_width(height) fields in the options. Both this target dimension and
// the input dimension should be even. If either keep_original_height or
// keep_original_width is set to true, the corresponding target dimension will
// only be used to compute the aspect ratio (as opposed to setting the actual
// dimension) of the output. If the output frame thus computed has an odd
// size, it will be rounded down to an even number.
//
// The calculator takes shot boundary signals to identify shot boundaries, and
// crops each scene independently. The cropping decisions are made based on
// detection features, which are a collection of focus regions detected from
// different signals, and then fused together by a SignalFusingCalculator. To
// add a new type of focus signals, it should be added in the input of the
// SignalFusingCalculator, which can take an arbitrary number of input streams.
//
// If after attempting to cover focus regions based on the cropping decisions
// made, the retained frame region's aspect ratio is still different from the
// target aspect ratio, padding will be applied. In this case, a seamless
// padding with a solid color would be preferred wherever possible, given
// information from the input static features; otherwise, a simple padding with
// centered foreground on blurred background will be applied.
//
// The main complexity of this calculator lies in stabilizing crop regions over
// the scene using a Retargeter, which solves linear programming problems
// through a L1 path solver (default) or least squares problems through a L2
// path solver.
// Input streams:
// - required tag VIDEO_FRAMES (type ImageFrame):
// Original scene frames to be cropped.
// - required tag DETECTION_FEATURES (type DetectionSet):
// Detected features on the key frames.
// - optional tag STATIC_FEATURES (type StaticFeatures):
// Detected features on the key frames.
// - required tag SHOT_BOUNDARIES (type bool):
// Indicators for shot boundaries (output of shot boundary detection).
// - optional tag KEY_FRAMES (type ImageFrame):
// Key frames on which features are detected. This is only used to set the
// detection features frame size, and when it is omitted, the features frame
// size is assumed to be the original scene frame size.
//
// Output streams:
// - required tag CROPPED_FRAMES (type ImageFrame):
// Cropped frames at target size and original frame rate.
// - optional tag KEY_FRAME_CROP_REGION_VIZ_FRAMES (type ImageFrame):
// Debug visualization frames at original frame size and frame rate. Draws
// the required (yellow) and non-required (cyan) detection features and the
// key frame crop regions (green).
// - optional tag SALIENT_POINT_FRAME_VIZ_FRAMES (type ImageFrame):
// Debug visualization frames at original frame size and frame rate. Draws
// the focus points and the scene crop window (red).
// - optional tag CROPPING_SUMMARY (type VideoCroppingSummary):
// Debug summary information for the video. Only generates one packet when
// calculator closes.
//
// Example config:
// node {
// calculator: "SceneCroppingCalculator"
// input_stream: "VIDEO_FRAMES:camera_frames_org"
// input_stream: "KEY_FRAMES:down_sampled_frames"
// input_stream: "DETECTION_FEATURES:focus_regions"
// input_stream: "STATIC_FEATURES:border_features"
// input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
// output_stream: "CROPPED_FRAMES:cropped_frames"
// options: {
// [mediapipe.SceneCroppingCalculatorOptions.ext]: {
// target_width: 720
// target_height: 1124
// target_size_type: USE_TARGET_DIMENSION
// }
// }
// }
// Note that only the target size is required in the options, and all other
// fields are optional with default settings.
class SceneCroppingCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
// Validates calculator options and initializes SceneCameraMotionAnalyzer and
// SceneCropper.
::mediapipe::Status Open(CalculatorContext* cc) override;
// Buffers each scene frame and its timestamp. Packs and stores KeyFrameInfo
// for key frames (a.k.a. frames with detection features). When a shot
// boundary is encountered or when the buffer is full, calls ProcessScene()
// to process the scene at once, and clears buffers.
::mediapipe::Status Process(CalculatorContext* cc) override;
// Calls ProcessScene() on remaining buffered frames. Optionally outputs a
// VideoCroppingSummary if the output stream CROPPING_SUMMARY is present.
::mediapipe::Status Close(::mediapipe::CalculatorContext* cc) override;
private:
// Removes any static borders from the scene frames before cropping.
::mediapipe::Status RemoveStaticBorders();
// Initializes a FrameCropRegionComputer given input and target frame sizes.
::mediapipe::Status InitializeFrameCropRegionComputer();
// Processes a scene using buffered scene frames and KeyFrameInfos:
// 1. Computes key frame crop regions using a FrameCropRegionComputer.
// 2. Analyzes scene camera motion and generates FocusPointFrames using a
// SceneCameraMotionAnalyzer.
// 3. Crops scene frames using a SceneCropper (wrapper around Retargeter).
// 4. Formats and outputs cropped frames .
// 5. Caches prior FocusPointFrames if this is not the end of a scene (due
// to force flush).
// 6. Optionally outputs visualization frames.
// 7. Optionally updates cropping summary.
::mediapipe::Status ProcessScene(const bool is_end_of_scene,
CalculatorContext* cc);
// Formats and outputs the cropped frames. Scales them to be at least as big
// as the target size. If the aspect ratio is different, applies padding. Uses
// solid background from static features if possible, otherwise uses blurred
// background. Sets apply_padding to true if the scene is padded.
::mediapipe::Status FormatAndOutputCroppedFrames(
const std::vector<cv::Mat>& cropped_frames, bool* apply_padding,
float* vertical_fill_precent, CalculatorContext* cc);
// Draws and outputs visualization frames if those streams are present.
::mediapipe::Status OutputVizFrames(
const std::vector<KeyFrameCropResult>& key_frame_crop_results,
const std::vector<FocusPointFrame>& focus_point_frames,
const int crop_window_width, const int crop_window_height,
CalculatorContext* cc) const;
// Filters detections based on USER_HINT under specific flag conditions.
void FilterKeyFrameInfo();
// Target frame size and aspect ratio passed in or computed from options.
int target_width_ = -1;
int target_height_ = -1;
double target_aspect_ratio_ = -1.0;
// Input video frame size and format.
int frame_width_ = -1;
int frame_height_ = -1;
ImageFormat::Format frame_format_ = ImageFormat::UNKNOWN;
// Key frame size (frame size for detections and border detections).
int key_frame_width_ = -1;
int key_frame_height_ = -1;
// Calculator options.
SceneCroppingCalculatorOptions options_;
// Buffered KeyFrameInfos for the current scene (size = number of key frames).
std::vector<KeyFrameInfo> key_frame_infos_;
// Buffered frames, timestamps, and indicators for key frames in the current
// scene (size = number of input video frames).
std::vector<cv::Mat> scene_frames_;
std::vector<int64> scene_frame_timestamps_;
std::vector<bool> is_key_frames_;
// Static border information for the scene.
int top_border_distance_ = -1;
int effective_frame_height_ = -1;
// Stored FocusPointFrames from prior scene when there was no actual scene
// change (due to forced flush when buffer is full).
std::vector<FocusPointFrame> prior_focus_point_frames_;
// KeyFrameCropOptions used by the FrameCropRegionComputer.
KeyFrameCropOptions key_frame_crop_options_;
// Object for computing key frame crop regions from detection features.
std::unique_ptr<FrameCropRegionComputer> frame_crop_region_computer_ =
nullptr;
// Object for analyzing scene camera motion from key frame crop regions and
// generating FocusPointFrames.
std::unique_ptr<SceneCameraMotionAnalyzer> scene_camera_motion_analyzer_ =
nullptr;
// Object for cropping a scene given FocusPointFrames.
std::unique_ptr<SceneCropper> scene_cropper_ = nullptr;
// Buffered static features and their timestamps used in padding with solid
// background color (size = number of frames with static features).
std::vector<StaticFeatures> static_features_;
std::vector<int64> static_features_timestamps_;
bool has_solid_background_ = false;
// CIELAB yields more natural color transitions than RGB and HSV: RGB tends to
// produce darker in-between colors and HSV can introduce new hues. See
// https://howaboutanorange.com/blog/2011/08/10/color_interpolation/ for
// visual comparisons of color transition in different spaces.
PiecewiseLinearFunction background_color_l_function_; // CIELAB - l
PiecewiseLinearFunction background_color_a_function_; // CIELAB - a
PiecewiseLinearFunction background_color_b_function_; // CIELAB - b
// Parameters for padding with blurred background passed in from options.
float background_contrast_ = -1.0;
int blur_cv_size_ = -1;
float overlay_opacity_ = -1.0;
// Object for padding an image to a target aspect ratio.
std::unique_ptr<PaddingEffectGenerator> padder_ = nullptr;
// Optional diagnostic summary output emitted in Close().
std::unique_ptr<VideoCroppingSummary> summary_ = nullptr;
};
} // namespace autoflip
} // namespace mediapipe
#endif // MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_CALCULATORS_SCENE_CROPPING_CALCULATOR_H_

View File

@ -0,0 +1,101 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/quality/cropping.proto";
import "mediapipe/framework/calculator.proto";
// Options for the SceneCroppingCalculator.
message SceneCroppingCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional SceneCroppingCalculatorOptions ext = 284806831;
}
// Target frame size - this has to be even (for ffmpeg encoding).
optional int32 target_width = 1;
optional int32 target_height = 2;
// Choices for target size specification.
enum TargetSizeType {
// Unknown type (needed by ProtoBestPractices to ensure consistent behavior
// across proto2 and proto3). This type should not be used.
UNKNOWN = 0;
// Directly uses the target dimension given above.
USE_TARGET_DIMENSION = 1;
// Uses the target dimension to compute the target aspect ratio, but keeps
// original height/width. If the resulting size for the other dimension is
// odd, it is rounded down to an even size.
KEEP_ORIGINAL_HEIGHT = 2;
KEEP_ORIGINAL_WIDTH = 3;
// Used on conjuntion with external_aspect_ratio, create the largest sized
// output without upscaling the video.
MAXIMIZE_TARGET_DIMENSION = 4;
}
optional TargetSizeType target_size_type = 3 [default = USE_TARGET_DIMENSION];
// Forces a flush of the frame buffer after this number of frames even if
// there is not a shot boundary.
optional int32 max_scene_size = 4 [default = 600];
// Number of frames from prior buffer to be used to smooth out camera
// trajectory when it was a forced flush.
optional int32 prior_frame_buffer_size = 5 [default = 30];
// Options for computing key frame crop regions using the
// FrameCropRegionComputer.
// **** Note: You shall NOT manually set the target width and height fields
// inside this field as they will be overridden internally in the calculator
// (i.e. automatically computed from target aspect ratio).
optional KeyFrameCropOptions key_frame_crop_options = 6;
// Options for analyzing scene camera motion and populating SalientPointFrames
// using the SceneCameraMotionAnalyzer.
optional SceneCameraMotionAnalyzerOptions
scene_camera_motion_analyzer_options = 7;
// If the fraction of frames with solid background in one shot exceeds this
// threshold, use a solid color for background in padding for this shot.
optional float solid_background_frames_padding_fraction = 8 [default = 0.6];
// Options for padding using the PaddingEffectGenerator (copied from
// ad_creation/calculators/universal_padding_calculator.proto).
message PaddingEffectParameters {
// Contrast adjustment for padding background. This value should between 0
// and 1. The smaller the value, the darker the background. 1 means no
// contrast change.
optional float background_contrast = 1 [default = 1.0];
// The cv::Size() parameter used in creating blurry effects for padding
// backgrounds.
optional int32 blur_cv_size = 2 [default = 200];
// The opacity of the black layer overlaied on top of the background. The
// value should be within [0, 1], in which 0 means totally transparent, and
// 1 means totally opaque.
optional float overlay_opacity = 3 [default = 0.6];
}
optional PaddingEffectParameters padding_parameters = 9;
// If set and input "KEY_FRAMES" not provided, uses these keyframe values.
optional int32 video_features_width = 10;
optional int32 video_features_height = 11;
// If a user hint is provided on a scene, use only this signal for cropping
// and camera motion.
optional bool user_hint_override = 12;
// An opacity used to render cropping windows for visualization purposes.
optional float viz_overlay_opacity = 13 [default = 0.7];
}

View File

@ -0,0 +1,621 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h"
#include <random>
#include <utility>
#include <vector>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace autoflip {
namespace {
using ::testing::HasSubstr;
constexpr char kConfig[] = R"(
calculator: "SceneCroppingCalculator"
input_stream: "VIDEO_FRAMES:camera_frames_org"
input_stream: "KEY_FRAMES:down_sampled_frames"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:border_features"
input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
output_stream: "CROPPED_FRAMES:cropped_frames"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
target_width: $0
target_height: $1
target_size_type: $2
max_scene_size: $3
prior_frame_buffer_size: $4
}
})";
constexpr char kNoKeyFrameConfig[] = R"(
calculator: "SceneCroppingCalculator"
input_stream: "VIDEO_FRAMES:camera_frames_org"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:border_features"
input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
output_stream: "CROPPED_FRAMES:cropped_frames"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
target_width: $0
target_height: $1
}
})";
constexpr char kDebugConfig[] = R"(
calculator: "SceneCroppingCalculator"
input_stream: "VIDEO_FRAMES:camera_frames_org"
input_stream: "KEY_FRAMES:down_sampled_frames"
input_stream: "DETECTION_FEATURES:salient_regions"
input_stream: "STATIC_FEATURES:border_features"
input_stream: "SHOT_BOUNDARIES:shot_boundary_frames"
output_stream: "CROPPED_FRAMES:cropped_frames"
output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames"
output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames"
output_stream: "CROPPING_SUMMARY:cropping_summaries"
options: {
[mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: {
target_width: $0
target_height: $1
}
})";
constexpr int kInputFrameWidth = 1280;
constexpr int kInputFrameHeight = 720;
constexpr int kKeyFrameWidth = 640;
constexpr int kKeyFrameHeight = 360;
constexpr int kTargetWidth = 720;
constexpr int kTargetHeight = 1124;
constexpr SceneCroppingCalculatorOptions::TargetSizeType kTargetSizeType =
SceneCroppingCalculatorOptions::USE_TARGET_DIMENSION;
constexpr int kNumScenes = 3;
constexpr int kSceneSize = 8;
constexpr int kMaxSceneSize = 10;
constexpr int kPriorFrameBufferSize = 5;
constexpr int kMinNumDetections = 0;
constexpr int kMaxNumDetections = 10;
constexpr int kDownSampleRate = 4;
constexpr int64 kTimestampDiff = 20000;
// Returns a singleton random engine for generating random values. The seed is
// fixed for reproducibility.
std::default_random_engine& GetGen() {
static std::default_random_engine generator{0};
return generator;
}
// Returns random color with r, g, b in the range of [0, 255].
cv::Scalar GetRandomColor() {
std::uniform_int_distribution<int> distribution(0, 255);
const int red = distribution(GetGen());
const int green = distribution(GetGen());
const int blue = distribution(GetGen());
return cv::Scalar(red, green, blue);
}
// Makes a detection set given number of detections. Each detection has randomly
// generated regions within given width and height with random score in [0, 1],
// and is randomly set to be required or non-required.
std::unique_ptr<DetectionSet> MakeDetections(const int num_detections,
const int width,
const int height) {
std::uniform_int_distribution<int> width_distribution(0, width);
std::uniform_int_distribution<int> height_distribution(0, height);
std::uniform_real_distribution<float> score_distribution(0.0, 1.0);
std::bernoulli_distribution is_required_distribution(0.5);
auto detections = absl::make_unique<DetectionSet>();
for (int i = 0; i < num_detections; ++i) {
auto* region = detections->add_detections();
const int x1 = width_distribution(GetGen());
const int x2 = width_distribution(GetGen());
const int y1 = height_distribution(GetGen());
const int y2 = height_distribution(GetGen());
const int x_min = std::min(x1, x2), x_max = std::max(x1, x2);
const int y_min = std::min(y1, y2), y_max = std::max(y1, y2);
auto* location = region->mutable_location();
location->set_x(x_min);
location->set_width(x_max - x_min);
location->set_y(y_min);
location->set_height(y_max - y_min);
region->set_score(score_distribution(GetGen()));
region->set_is_required(is_required_distribution(GetGen()));
}
return detections;
}
// Makes an image frame of solid color given color, width, and height.
std::unique_ptr<ImageFrame> MakeImageFrameFromColor(const cv::Scalar& color,
const int width,
const int height) {
auto image_frame =
absl::make_unique<ImageFrame>(ImageFormat::SRGB, width, height);
auto mat = formats::MatView(image_frame.get());
mat = color;
return image_frame;
}
// Adds key frame detection features given time (in ms) to the input stream.
// Randomly generates a number of detections in the range of kMinNumDetections
// and kMaxNumDetections. Optionally add a key image frame of random solid color
// and given size.
void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
const int key_frame_height,
CalculatorRunner::StreamContentsSet* inputs) {
Timestamp timestamp(time_ms);
if (inputs->HasTag("KEY_FRAMES")) {
auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width,
key_frame_height);
inputs->Tag("KEY_FRAMES")
.packets.push_back(Adopt(key_frame.release()).At(timestamp));
}
const int num_detections = std::uniform_int_distribution<int>(
kMinNumDetections, kMaxNumDetections)(GetGen());
auto detections =
MakeDetections(num_detections, key_frame_width, key_frame_height);
inputs->Tag("DETECTION_FEATURES")
.packets.push_back(Adopt(detections.release()).At(timestamp));
}
// Adds a scene given number of frames to the input stream. Spaces frame at the
// default timestamp interval starting from given start frame index. Scene has
// empty static features.
void AddScene(const int start_frame_index, const int num_scene_frames,
const int frame_width, const int frame_height,
const int key_frame_width, const int key_frame_height,
CalculatorRunner::StreamContentsSet* inputs) {
int64 time_ms = start_frame_index * kTimestampDiff;
for (int i = 0; i < num_scene_frames; ++i) {
Timestamp timestamp(time_ms);
auto frame =
MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height);
inputs->Tag("VIDEO_FRAMES")
.packets.push_back(Adopt(frame.release()).At(timestamp));
auto static_features = absl::make_unique<StaticFeatures>();
inputs->Tag("STATIC_FEATURES")
.packets.push_back(Adopt(static_features.release()).At(timestamp));
if (i % kDownSampleRate == 0) { // is a key frame
AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, inputs);
}
if (i == num_scene_frames - 1) { // adds shot boundary
inputs->Tag("SHOT_BOUNDARIES")
.packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms)));
}
time_ms += kTimestampDiff;
}
}
// Checks that the output stream for cropped frames has the correct number of
// frames, and that the size of each frame is correct.
void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames,
const int target_width, const int target_height) {
const auto& outputs = runner.Outputs();
EXPECT_TRUE(outputs.HasTag("CROPPED_FRAMES"));
const auto& cropped_frames_outputs = outputs.Tag("CROPPED_FRAMES").packets;
EXPECT_EQ(cropped_frames_outputs.size(), num_frames);
for (int i = 0; i < num_frames; ++i) {
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
EXPECT_EQ(cropped_frame.Width(), target_width);
EXPECT_EQ(cropped_frame.Height(), target_height);
}
}
// Checks that the calculator checks the maximum scene size is valid.
TEST(SceneCroppingCalculatorTest, ChecksMaxSceneSize) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kConfig, kTargetWidth, kTargetHeight,
kTargetSizeType, 0, kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
const auto status = runner->Run();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
HasSubstr("Maximum scene size is non-positive."));
}
// Checks that the calculator checks the prior frame buffer size is valid.
TEST(SceneCroppingCalculatorTest, ChecksPriorFrameBufferSize) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kConfig, kTargetWidth, kTargetHeight,
kTargetSizeType, kMaxSceneSize, -1));
auto runner = absl::make_unique<CalculatorRunner>(config);
const auto status = runner->Run();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
HasSubstr("Prior frame buffer size is negative."));
}
// Checks that the calculator crops scene frames when there is no input key
// frames stream.
TEST(SceneCroppingCalculatorTest, HandlesNoKeyFrames) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kNoKeyFrameConfig, kTargetWidth, kTargetHeight));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, kSceneSize, kInputFrameWidth, kInputFrameHeight, kKeyFrameWidth,
kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, kSceneSize, kTargetWidth, kTargetHeight);
}
// Checks that the calculator handles scenes longer than maximum scene size (
// force flush is triggered).
TEST(SceneCroppingCalculatorTest, HandlesLongScene) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, kTargetWidth, kTargetHeight, kTargetSizeType, kMaxSceneSize,
kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, 2 * kMaxSceneSize, kInputFrameWidth, kInputFrameHeight,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, 2 * kMaxSceneSize, kTargetWidth, kTargetHeight);
}
// Checks that the calculator can optionally output debug streams.
TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kDebugConfig, kTargetWidth, kTargetHeight));
auto runner = absl::make_unique<CalculatorRunner>(config);
const int num_frames = kSceneSize;
AddScene(0, num_frames, kInputFrameWidth, kInputFrameHeight, kKeyFrameWidth,
kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs();
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES"));
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES"));
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY"));
const auto& crop_region_viz_frames_outputs =
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets;
const auto& salient_point_viz_frames_outputs =
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets;
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets;
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(summary_output.size(), 1);
for (int i = 0; i < num_frames; ++i) {
const auto& crop_region_viz_frame =
crop_region_viz_frames_outputs[i].Get<ImageFrame>();
EXPECT_EQ(crop_region_viz_frame.Width(), kInputFrameWidth);
EXPECT_EQ(crop_region_viz_frame.Height(), kInputFrameHeight);
const auto& salient_point_viz_frame =
salient_point_viz_frames_outputs[i].Get<ImageFrame>();
EXPECT_EQ(salient_point_viz_frame.Width(), kInputFrameWidth);
EXPECT_EQ(salient_point_viz_frame.Height(), kInputFrameHeight);
}
const auto& summary = summary_output[0].Get<VideoCroppingSummary>();
EXPECT_EQ(summary.scene_summaries_size(), 2);
const auto& summary_0 = summary.scene_summaries(0);
EXPECT_TRUE(summary_0.is_padded());
EXPECT_TRUE(summary_0.camera_motion().has_steady_motion());
}
// Checks that the calculator handles the case of generating landscape frames.
TEST(SceneCroppingCalculatorTest, HandlesLandscapeTarget) {
const int input_width = 900;
const int input_height = 1600;
const int target_width = 1200;
const int target_height = 800;
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, target_width, target_height, kTargetSizeType, kMaxSceneSize,
kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
for (int i = 0; i < kNumScenes; ++i) {
AddScene(i * kSceneSize, kSceneSize, input_width, input_height,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
}
const int num_frames = kSceneSize * kNumScenes;
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, num_frames, target_width, target_height);
}
// Checks that the calculator crops scene frames to target size when the target
// size type is the default USE_TARGET_DIMENSION.
TEST(SceneCroppingCalculatorTest, CropsToTargetSize) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, kTargetWidth, kTargetHeight, kTargetSizeType, kMaxSceneSize,
kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
for (int i = 0; i < kNumScenes; ++i) {
AddScene(i * kSceneSize, kSceneSize, kInputFrameWidth, kInputFrameHeight,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
}
const int num_frames = kSceneSize * kNumScenes;
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, num_frames, kTargetWidth, kTargetHeight);
}
// Checks that the calculator keeps original height if the target size type is
// set to KEEP_ORIGINAL_HEIGHT.
TEST(SceneCroppingCalculatorTest, KeepsOriginalHeight) {
const auto target_size_type =
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_HEIGHT;
const int target_height = kInputFrameHeight;
const double target_aspect_ratio =
static_cast<double>(kTargetWidth) / kTargetHeight;
int target_width = std::round(target_height * target_aspect_ratio);
if (target_width % 2 == 1) target_width--;
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, kTargetWidth, kTargetHeight, target_size_type, kMaxSceneSize,
kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, kMaxSceneSize, kInputFrameWidth, kInputFrameHeight,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, kMaxSceneSize, target_width, target_height);
}
// Checks that the calculator keeps original width if the target size type is
// set to KEEP_ORIGINAL_WIDTH.
TEST(SceneCroppingCalculatorTest, KeepsOriginalWidth) {
const auto target_size_type =
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_WIDTH;
const int target_width = kInputFrameWidth;
const double target_aspect_ratio =
static_cast<double>(kTargetWidth) / kTargetHeight;
int target_height = std::round(target_width / target_aspect_ratio);
if (target_height % 2 == 1) target_height--;
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, kTargetWidth, kTargetHeight, target_size_type, kMaxSceneSize,
kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, kMaxSceneSize, kInputFrameWidth, kInputFrameHeight,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
CheckCroppedFrames(*runner, kMaxSceneSize, target_width, target_height);
}
// Checks that the calculator rejects odd target size.
TEST(SceneCroppingCalculatorTest, RejectsOddTargetSize) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, kTargetWidth - 1, kTargetHeight, kTargetSizeType,
kMaxSceneSize, kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, kMaxSceneSize, kInputFrameWidth, kInputFrameHeight,
kKeyFrameWidth, kKeyFrameHeight, runner->MutableInputs());
const auto status = runner->Run();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Target width cannot be odd"));
}
// Checks that the calculator always produces even frame size given even input
// frame size and even target under all target size types.
TEST(SceneCroppingCalculatorTest, ProducesEvenFrameSize) {
// Some commonly used video resolution (some are divided by 10 to make the
// test faster), and some odd input frame sizes.
const std::vector<std::pair<int, int>> video_sizes = {
{384, 216}, {256, 144}, {192, 108}, {128, 72}, {640, 360},
{426, 240}, {100, 100}, {214, 100}, {240, 100}, {720, 1124},
{90, 160}, {641, 360}, {640, 361}, {101, 101}};
const std::vector<SceneCroppingCalculatorOptions::TargetSizeType>
target_size_types = {SceneCroppingCalculatorOptions::USE_TARGET_DIMENSION,
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_HEIGHT,
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_WIDTH};
// Exhaustive check on each size as input and each size as output for each
// target size type.
for (int i = 0; i < video_sizes.size(); ++i) {
const int frame_width = video_sizes[i].first;
const int frame_height = video_sizes[i].second;
for (int j = 0; j < video_sizes.size(); ++j) {
const int target_width = video_sizes[j].first;
const int target_height = video_sizes[j].second;
if (target_width % 2 == 1 || target_height % 2 == 1) continue;
for (int k = 0; k < target_size_types.size(); ++k) {
const CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(absl::Substitute(
kConfig, target_width, target_height, target_size_types[k],
kMaxSceneSize, kPriorFrameBufferSize));
auto runner = absl::make_unique<CalculatorRunner>(config);
AddScene(0, 1, frame_width, frame_height, kKeyFrameWidth,
kKeyFrameHeight, runner->MutableInputs());
MP_EXPECT_OK(runner->Run());
const auto& output_frame = runner->Outputs()
.Tag("CROPPED_FRAMES")
.packets[0]
.Get<ImageFrame>();
EXPECT_EQ(output_frame.Width() % 2, 0);
EXPECT_EQ(output_frame.Height() % 2, 0);
if (target_size_types[k] ==
SceneCroppingCalculatorOptions::USE_TARGET_DIMENSION) {
EXPECT_EQ(output_frame.Width(), target_width);
EXPECT_EQ(output_frame.Height(), target_height);
} else if (target_size_types[k] ==
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_HEIGHT) {
// Difference could be 1 if input size is odd.
EXPECT_LE(std::abs(output_frame.Height() - frame_height), 1);
} else if (target_size_types[k] ==
SceneCroppingCalculatorOptions::KEEP_ORIGINAL_WIDTH) {
EXPECT_LE(std::abs(output_frame.Width() - frame_width), 1);
}
}
}
}
}
// Checks that the calculator pads the frames with solid color when possible.
TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
const int target_width = 100, target_height = 200;
const int input_width = 100, input_height = 100;
CalculatorGraphConfig::Node config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kNoKeyFrameConfig, target_width, target_height));
auto* options = config.mutable_options()->MutableExtension(
SceneCroppingCalculatorOptions::ext);
options->set_solid_background_frames_padding_fraction(0.6);
auto runner = absl::make_unique<CalculatorRunner>(config);
const int static_features_downsample_rate = 2;
const float fraction_with_solid_background = 0.7;
const int red = 122, green = 167, blue = 250;
const int num_frames_with_solid_background =
std::round(fraction_with_solid_background * kSceneSize /
static_features_downsample_rate);
// Add inputs.
auto* inputs = runner->MutableInputs();
int64 time_ms = 0;
int num_static_features = 0;
for (int i = 0; i < kSceneSize; ++i) {
Timestamp timestamp(time_ms);
auto frame =
MakeImageFrameFromColor(GetRandomColor(), input_width, input_height);
inputs->Tag("VIDEO_FRAMES")
.packets.push_back(Adopt(frame.release()).At(timestamp));
if (i % static_features_downsample_rate == 0) {
auto static_features = absl::make_unique<StaticFeatures>();
if (num_static_features < num_frames_with_solid_background) {
auto* color = static_features->mutable_solid_background();
// Uses BGR to mimic input from static features solid background color.
color->set_r(blue);
color->set_g(green);
color->set_b(red);
}
inputs->Tag("STATIC_FEATURES")
.packets.push_back(Adopt(static_features.release()).At(timestamp));
num_static_features++;
}
if (i % kDownSampleRate == 0) { // is a key frame
// Target crop size is (50, 100). Adds one required detection with size
// (80, 100) larger than the target crop size to force padding.
auto detections = absl::make_unique<DetectionSet>();
auto* salient_region = detections->add_detections();
salient_region->set_is_required(true);
auto* location = salient_region->mutable_location();
location->set_x(10);
location->set_y(0);
location->set_width(80);
location->set_height(input_height);
inputs->Tag("DETECTION_FEATURES")
.packets.push_back(Adopt(detections.release()).At(timestamp));
}
time_ms += kTimestampDiff;
}
MP_EXPECT_OK(runner->Run());
// Checks that the top and bottom borders indeed have the background color.
const int border_size = 37;
const auto& cropped_frames_outputs =
runner->Outputs().Tag("CROPPED_FRAMES").packets;
EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize);
for (int i = 0; i < kSceneSize; ++i) {
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
cv::Mat mat = formats::MatView(&cropped_frame);
for (int x = 0; x < target_width; ++x) {
for (int y = 0; y < border_size; ++y) {
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[0], red);
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[1], green);
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[2], blue);
}
for (int y2 = 0; y2 < border_size; ++y2) {
const int y = target_height - 1 - y2;
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[0], red);
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[1], green);
EXPECT_EQ(mat.at<cv::Vec3b>(y, x)[2], blue);
}
}
}
}
// Checks that the calculator removes static borders from frames.
TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
const int target_width = 50, target_height = 100;
const int input_width = 100, input_height = 100;
const int top_border_size = 20, bottom_border_size = 20;
const cv::Rect top_border_rect(0, 0, input_width, top_border_size);
const cv::Rect bottom_border_rect(0, input_height - bottom_border_size,
input_width, bottom_border_size);
const cv::Scalar frame_color = cv::Scalar(255, 255, 255);
const cv::Scalar border_color = cv::Scalar(0, 0, 0);
const auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(kNoKeyFrameConfig, target_width, target_height));
auto runner = absl::make_unique<CalculatorRunner>(config);
// Add inputs.
auto* inputs = runner->MutableInputs();
const auto timestamp = Timestamp(0);
// Make frame with borders.
auto frame = MakeImageFrameFromColor(frame_color, input_width, input_height);
auto mat = formats::MatView(frame.get());
mat(top_border_rect) = border_color;
mat(bottom_border_rect) = border_color;
inputs->Tag("VIDEO_FRAMES")
.packets.push_back(Adopt(frame.release()).At(timestamp));
// Set borders in static features.
auto static_features = absl::make_unique<StaticFeatures>();
auto* top_part = static_features->add_border();
top_part->set_relative_position(Border::TOP);
top_part->mutable_border_position()->set_height(top_border_size);
auto* bottom_part = static_features->add_border();
bottom_part->set_relative_position(Border::BOTTOM);
bottom_part->mutable_border_position()->set_height(bottom_border_size);
inputs->Tag("STATIC_FEATURES")
.packets.push_back(Adopt(static_features.release()).At(timestamp));
// Add empty detections to ensure no padding is used.
auto detections = absl::make_unique<DetectionSet>();
inputs->Tag("DETECTION_FEATURES")
.packets.push_back(Adopt(detections.release()).At(timestamp));
MP_EXPECT_OK(runner->Run());
// Checks that the top and bottom borders are removed. Each frame should have
// solid color equal to frame color.
const auto& cropped_frames_outputs =
runner->Outputs().Tag("CROPPED_FRAMES").packets;
EXPECT_EQ(cropped_frames_outputs.size(), 1);
const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>();
const auto cropped_mat = formats::MatView(&cropped_frame);
for (int x = 0; x < target_width; ++x) {
for (int y = 0; y < target_height; ++y) {
EXPECT_EQ(cropped_mat.at<cv::Vec3b>(y, x)[0], frame_color[0]);
EXPECT_EQ(cropped_mat.at<cv::Vec3b>(y, x)[1], frame_color[1]);
EXPECT_EQ(cropped_mat.at<cv::Vec3b>(y, x)[2], frame_color[2]);
}
}
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,190 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "mediapipe/examples/desktop/autoflip/calculators/shot_boundary_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/timestamp.h"
using mediapipe::ImageFrame;
using mediapipe::PacketTypeSet;
// IO labels.
constexpr char kVideoInputTag[] = "VIDEO";
constexpr char kShotChangeTag[] = "IS_SHOT_CHANGE";
// Histogram settings.
const int kSaturationBins = 8;
const int kHistogramChannels[] = {0, 1, 2};
const int kHistogramBinNum[] = {kSaturationBins, kSaturationBins,
kSaturationBins};
const float kRange[] = {0, 256};
const float* kHistogramRange[] = {kRange, kRange, kRange};
namespace mediapipe {
namespace autoflip {
// This calculator computes a shot (or scene) change within a video. It works
// by computing a 3d color histogram and comparing this frame-to-frame. Settings
// to control the shot change logic are presented in the options proto.
//
// Example:
// node {
// calculator: "ShotBoundaryCalculator"
// input_stream: "VIDEO:camera_frames"
// output_stream: "IS_SHOT_CHANGE:is_shot"
// }
class ShotBoundaryCalculator : public mediapipe::CalculatorBase {
public:
ShotBoundaryCalculator() {}
ShotBoundaryCalculator(const ShotBoundaryCalculator&) = delete;
ShotBoundaryCalculator& operator=(const ShotBoundaryCalculator&) = delete;
static ::mediapipe::Status GetContract(mediapipe::CalculatorContract* cc);
mediapipe::Status Open(mediapipe::CalculatorContext* cc) override;
mediapipe::Status Process(mediapipe::CalculatorContext* cc) override;
private:
// Computes the histogram of an image.
void ComputeHistogram(const cv::Mat& image, cv::Mat* image_histogram);
// Transmits signal to next calculator.
void Transmit(mediapipe::CalculatorContext* cc, bool is_shot_change);
// Calculator options.
ShotBoundaryCalculatorOptions options_;
// Last time a shot was detected.
Timestamp last_shot_timestamp_;
// Defines if the calculator has received a frame yet.
bool init_;
// Histogram from the last frame.
cv::Mat last_histogram_;
// History of histogram motion.
std::deque<double> motion_history_;
};
REGISTER_CALCULATOR(ShotBoundaryCalculator);
void ShotBoundaryCalculator::ComputeHistogram(const cv::Mat& image,
cv::Mat* image_histogram) {
cv::Mat equalized_image;
cv::cvtColor(image.clone(), equalized_image, CV_RGB2GRAY);
double min, max;
cv::minMaxLoc(equalized_image, &min, &max);
if (options_.equalize_histogram()) {
cv::equalizeHist(equalized_image, equalized_image);
}
cv::calcHist(&image, 1, kHistogramChannels, cv::Mat(), *image_histogram, 2,
kHistogramBinNum, kHistogramRange, true, false);
}
mediapipe::Status ShotBoundaryCalculator::Open(
mediapipe::CalculatorContext* cc) {
options_ = cc->Options<ShotBoundaryCalculatorOptions>();
last_shot_timestamp_ = Timestamp(0);
init_ = false;
return ::mediapipe::OkStatus();
}
void ShotBoundaryCalculator::Transmit(mediapipe::CalculatorContext* cc,
bool is_shot_change) {
if ((cc->InputTimestamp() - last_shot_timestamp_).Seconds() <
options_.min_shot_span()) {
is_shot_change = false;
}
if (is_shot_change) {
LOG(INFO) << "Shot change at: " << cc->InputTimestamp().Seconds()
<< " seconds.";
cc->Outputs()
.Tag(kShotChangeTag)
.AddPacket(Adopt(std::make_unique<bool>(true).release())
.At(cc->InputTimestamp()));
} else if (!options_.output_only_on_change()) {
cc->Outputs()
.Tag(kShotChangeTag)
.AddPacket(Adopt(std::make_unique<bool>(false).release())
.At(cc->InputTimestamp()));
}
}
::mediapipe::Status ShotBoundaryCalculator::Process(
mediapipe::CalculatorContext* cc) {
// Connect to input frame and make a mutable copy.
cv::Mat frame_org = mediapipe::formats::MatView(
&cc->Inputs().Tag(kVideoInputTag).Get<ImageFrame>());
cv::Mat frame = frame_org.clone();
// Extract histogram from the current frame.
cv::Mat current_histogram;
ComputeHistogram(frame, &current_histogram);
if (!init_) {
last_histogram_ = current_histogram;
init_ = true;
Transmit(cc, false);
return ::mediapipe::OkStatus();
}
double current_motion_estimate =
1 - cv::compareHist(current_histogram, last_histogram_, CV_COMP_CORREL);
last_histogram_ = current_histogram;
motion_history_.push_front(current_motion_estimate);
if (motion_history_.size() != options_.window_size()) {
Transmit(cc, false);
return ::mediapipe::OkStatus();
}
// Shot detection algorithm is a mixture of adaptive (controlled with
// shot_measure) and hard thresholds. In saturation it uses hard thresholds
// to account for black startups, shot cuts across high motion etc.
// In the operating region it uses an adaptive threshold to tune motion vs.
// cut boundary.
double current_max =
*std::max_element(motion_history_.begin(), motion_history_.end());
double shot_measure = current_motion_estimate / current_max;
if ((shot_measure > options_.min_shot_measure() &&
current_motion_estimate > options_.min_motion_with_shot_measure()) ||
current_motion_estimate > options_.min_motion()) {
Transmit(cc, true);
last_shot_timestamp_ = cc->InputTimestamp();
} else {
Transmit(cc, false);
}
// Store histogram for next frame.
last_histogram_ = current_histogram;
motion_history_.pop_back();
return ::mediapipe::OkStatus();
}
::mediapipe::Status ShotBoundaryCalculator::GetContract(
mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag(kVideoInputTag).Set<ImageFrame>();
cc->Outputs().Tag(kShotChangeTag).Set<bool>();
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,48 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator.proto";
message ShotBoundaryCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional ShotBoundaryCalculatorOptions ext = 281194049;
}
// Parameters to shot detection algorithm. All the constraints (the fields
// named with 'min_') need to be satisfied for a frame to be a shot boundary.
//
// Minimum motion to be considered as a shot boundary frame.
optional double min_motion = 1 [default = 0.2];
// Minimum number of shot duration (in seconds).
optional double min_shot_span = 2 [default = 2];
// A window for computing shot measure (see the definition in min_shot_measure
// field).
optional int32 window_size = 3 [default = 7];
// Minimum shot measure to be considered as a shot boundary frame.
// Must also satisfy the min_motion_with_shot_measure constraint.
// The shot measure is defined as the ratio of the motion of the
// current frame to the maximum motion of the frames in the window (defined
// as window_size).
optional double min_shot_measure = 4 [default = 10];
// Minimum motion to be considered as a shot boundary frame.
// Must also satisfy the min_shot_measure constraint.
optional double min_motion_with_shot_measure = 5 [default = 0.05];
// Only send results if the shot value is true.
optional bool output_only_on_change = 6 [default = true];
// Perform histogram equalization before computing keypoints/features.
optional bool equalize_histogram = 7 [default = false];
}

View File

@ -0,0 +1,163 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include "mediapipe/examples/desktop/autoflip/calculators/shot_boundary_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
using mediapipe::Adopt;
using mediapipe::CalculatorGraphConfig;
using mediapipe::CalculatorRunner;
using mediapipe::ImageFormat;
using mediapipe::ImageFrame;
using mediapipe::PacketTypeSet;
using mediapipe::ParseTextProtoOrDie;
using mediapipe::Timestamp;
namespace mediapipe {
namespace autoflip {
namespace {
const char kConfig[] = R"(
calculator: "ShotBoundaryCalculator"
input_stream: "VIDEO:camera_frames"
output_stream: "IS_SHOT_CHANGE:is_shot"
)";
const int kTestFrameWidth = 640;
const int kTestFrameHeight = 480;
void AddFrames(const int number_of_frames, const std::set<int>& skip_frames,
CalculatorRunner* runner) {
cv::Mat image =
cv::imread(file::JoinPath("./",
"/mediapipe/examples/desktop/"
"autoflip/calculators/testdata/dino.jpg"));
for (int i = 0; i < number_of_frames; i++) {
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0));
cv::Mat sub_image =
image(cv::Rect(i, i, kTestFrameWidth, kTestFrameHeight));
cv::Mat frame_area =
input_mat(cv::Rect(0, 0, sub_image.cols, sub_image.rows));
if (skip_frames.count(i) < 1) {
sub_image.copyTo(frame_area);
}
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
Adopt(input_frame.release()).At(Timestamp(i * 1000000)));
}
}
void CheckOutput(const int number_of_frames, const std::set<int>& shot_frames,
const std::vector<Packet>& output_packets) {
ASSERT_EQ(number_of_frames, output_packets.size());
for (int i = 0; i < number_of_frames; i++) {
if (shot_frames.count(i) < 1) {
EXPECT_FALSE(output_packets[i].Get<bool>());
} else {
EXPECT_TRUE(output_packets[i].Get<bool>());
}
}
}
TEST(ShotBoundaryCalculatorTest, NoShotChange) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_output_only_on_change(false);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
AddFrames(10, {}, runner.get());
MP_ASSERT_OK(runner->Run());
CheckOutput(10, {}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
}
TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_output_only_on_change(false);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
AddFrames(20, {10}, runner.get());
MP_ASSERT_OK(runner->Run());
CheckOutput(20, {10}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
}
TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_output_only_on_change(false);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
AddFrames(20, {14, 17}, runner.get());
MP_ASSERT_OK(runner->Run());
CheckOutput(20, {14, 17}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
}
TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_min_shot_span(5);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_output_only_on_change(false);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
AddFrames(24, {16, 19}, runner.get());
MP_ASSERT_OK(runner->Run());
CheckOutput(24, {16}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
}
TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
CalculatorGraphConfig::Node node =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfig);
node.mutable_options()
->MutableExtension(ShotBoundaryCalculatorOptions::ext)
->set_output_only_on_change(true);
auto runner = ::absl::make_unique<CalculatorRunner>(node);
AddFrames(20, {15}, runner.get());
MP_ASSERT_OK(runner->Run());
auto output_packets = runner->Outputs().Tag("IS_SHOT_CHANGE").packets;
ASSERT_EQ(output_packets.size(), 1);
ASSERT_EQ(output_packets[0].Get<bool>(), true);
ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000);
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,231 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/signal_fusing_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
using mediapipe::Packet;
using mediapipe::PacketTypeSet;
using mediapipe::autoflip::DetectionSet;
using mediapipe::autoflip::SalientRegion;
using mediapipe::autoflip::SignalType;
namespace mediapipe {
namespace autoflip {
struct InputSignal {
SalientRegion signal;
int source;
};
struct Frame {
std::vector<InputSignal> input_detections;
mediapipe::Timestamp time;
};
// This calculator takes one scene change signal and an arbitrary number of
// detection signals and outputs a single list of detections. The scores for
// the detections can be re-normalized using the options proto. Additionally,
// if a detection has a consistent tracking id during a scene the score for that
// detection is averaged over the whole scene.
//
// Example:
// node {
// calculator: "SignalFusingCalculator"
// input_stream: "scene_change"
// input_stream: "detection_faces"
// input_stream: "detection_custom_text"
// output_stream: "salient_region"
// options:{
// [mediapipe.autoflip.SignalFusingCalculatorOptions.ext]:{
// signal_settings{
// type: {standard: FACE}
// min_score: 0.5
// max_score: 0.6
// }
// signal_settings{
// type: {custom: "custom_text"}
// min_score: 0.9
// max_score: 1.0
// }
// }
// }
// }
class SignalFusingCalculator : public mediapipe::CalculatorBase {
public:
SignalFusingCalculator() {}
SignalFusingCalculator(const SignalFusingCalculator&) = delete;
SignalFusingCalculator& operator=(const SignalFusingCalculator&) = delete;
static ::mediapipe::Status GetContract(mediapipe::CalculatorContract* cc);
mediapipe::Status Open(mediapipe::CalculatorContext* cc) override;
mediapipe::Status Process(mediapipe::CalculatorContext* cc) override;
mediapipe::Status Close(mediapipe::CalculatorContext* cc) override;
private:
mediapipe::Status ProcessScene(mediapipe::CalculatorContext* cc);
SignalFusingCalculatorOptions options_;
std::map<std::string, SignalSettings> settings_by_type_;
std::vector<Frame> scene_frames_;
};
REGISTER_CALCULATOR(SignalFusingCalculator);
namespace {
std::string CreateSettingsKey(const SignalType& signal_type) {
if (signal_type.has_standard()) {
return "standard_" + std::to_string(signal_type.standard());
} else {
return "custom_" + signal_type.custom();
}
}
std::string CreateKey(const InputSignal& detection) {
std::string id_source = std::to_string(detection.source);
std::string id_signal = std::to_string(detection.signal.tracking_id());
std::string id = id_source + ":" + id_signal;
return id;
}
} // namespace
mediapipe::Status SignalFusingCalculator::Open(
mediapipe::CalculatorContext* cc) {
options_ = cc->Options<SignalFusingCalculatorOptions>();
for (const auto& setting : options_.signal_settings()) {
settings_by_type_[CreateSettingsKey(setting.type())] = setting;
}
return ::mediapipe::OkStatus();
}
mediapipe::Status SignalFusingCalculator::Close(
mediapipe::CalculatorContext* cc) {
if (!scene_frames_.empty()) {
MP_RETURN_IF_ERROR(ProcessScene(cc));
scene_frames_.clear();
}
return ::mediapipe::OkStatus();
}
mediapipe::Status SignalFusingCalculator::ProcessScene(
mediapipe::CalculatorContext* cc) {
std::map<std::string, int> detection_count;
std::map<std::string, float> multiframe_score;
// Create a unified score for all items with temporal ids.
for (const Frame& frame : scene_frames_) {
for (const auto& detection : frame.input_detections) {
if (detection.signal.has_tracking_id()) {
// Create key for each detector type
if (detection_count.find(CreateKey(detection)) ==
detection_count.end()) {
multiframe_score[CreateKey(detection)] = 0.0;
detection_count[CreateKey(detection)] = 0;
}
multiframe_score[CreateKey(detection)] += detection.signal.score();
detection_count[CreateKey(detection)]++;
}
}
}
// Average scores.
for (auto iterator = multiframe_score.begin();
iterator != multiframe_score.end(); iterator++) {
multiframe_score[iterator->first] =
iterator->second / detection_count[iterator->first];
}
// Process detections.
for (const Frame& frame : scene_frames_) {
std::unique_ptr<DetectionSet> processed_detections(new DetectionSet());
for (auto detection : frame.input_detections) {
float score = detection.signal.score();
if (detection.signal.has_tracking_id()) {
std::string id_source = std::to_string(detection.source);
std::string id_signal = std::to_string(detection.signal.tracking_id());
std::string id = id_source + ":" + id_signal;
score = multiframe_score[id];
}
// Normalize within range.
float min_value = 0.0;
float max_value = 1.0;
auto settings_it = settings_by_type_.find(
CreateSettingsKey(detection.signal.signal_type()));
if (settings_it != settings_by_type_.end()) {
min_value = settings_it->second.min_score();
max_value = settings_it->second.max_score();
detection.signal.set_is_required(settings_it->second.is_required());
}
float final_score = score * (max_value - min_value) + min_value;
detection.signal.set_score(final_score);
*processed_detections->add_detections() = detection.signal;
}
cc->Outputs().Index(0).Add(processed_detections.release(), frame.time);
}
return ::mediapipe::OkStatus();
}
mediapipe::Status SignalFusingCalculator::Process(
mediapipe::CalculatorContext* cc) {
bool is_boundary = false;
if (!cc->Inputs().Index(0).Value().IsEmpty()) {
is_boundary = cc->Inputs().Index(0).Get<bool>();
}
if (is_boundary || scene_frames_.size() > options_.max_scene_size()) {
MP_RETURN_IF_ERROR(ProcessScene(cc));
scene_frames_.clear();
}
Frame frame;
for (int i = 1; i < cc->Inputs().NumEntries(); ++i) {
const Packet& packet = cc->Inputs().Index(i).Value();
if (packet.IsEmpty()) {
continue;
}
const auto& detection_set = packet.Get<autoflip::DetectionSet>();
for (const auto& detection : detection_set.detections()) {
InputSignal input;
input.signal = detection;
input.source = i;
frame.input_detections.push_back(input);
}
}
frame.time = cc->InputTimestamp();
scene_frames_.push_back(frame);
return ::mediapipe::OkStatus();
}
::mediapipe::Status SignalFusingCalculator::GetContract(
mediapipe::CalculatorContract* cc) {
cc->Inputs().Index(0).Set<bool>();
for (int i = 1; i < cc->Inputs().NumEntries(); ++i) {
cc->Inputs().Index(i).Set<autoflip::DetectionSet>();
}
cc->Outputs().Index(0).Set<autoflip::DetectionSet>();
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,54 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/autoflip_messages.proto";
import "mediapipe/framework/calculator.proto";
// Next tag: 3
message SignalFusingCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional SignalFusingCalculatorOptions ext = 280092372;
}
// Setting related to each type of signal this calculator could process.
repeated SignalSettings signal_settings = 1;
// Force a flush of the frame buffer after this number of frames.
optional int32 max_scene_size = 2 [default = 600];
}
// Next tag: 5
message SignalSettings {
// The type of signal these settings pertain to.
optional SignalType type = 1;
// Force a normalized incoming score to be re-normalized to within this range.
// (set values to min:0 and max:1 for no change in the incoming score)
// Values must be between 0-1, min must be less than max.
//
// Example of score adjustment:
// Incoming OCR score: .7
// Min OCR Score: .9
// Max OCR Score: 1.0
// --Result: .97
optional float min_score = 2 [default = 0];
optional float max_score = 3 [default = 1.0];
// Is this signal required within the output cropped video? If it is it will
// be included or the video will be marked as failed to convert.
optional bool is_required = 4 [default = false];
}

View File

@ -0,0 +1,438 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/string_view.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/signal_fusing_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
using mediapipe::autoflip::DetectionSet;
namespace mediapipe {
namespace autoflip {
namespace {
const char kConfigA[] = R"(
calculator: "SignalFusingCalculator"
input_stream: "scene_change"
input_stream: "detection_set_a"
input_stream: "detection_set_b"
output_stream: "salient_region"
options:{
[mediapipe.autoflip.SignalFusingCalculatorOptions.ext]:{
signal_settings{
type: {standard: FACE_FULL}
min_score: 0.5
max_score: 0.6
}
signal_settings{
type: {standard: TEXT}
min_score: 0.9
max_score: 1.0
}
}
})";
const char kConfigB[] = R"(
calculator: "SignalFusingCalculator"
input_stream: "scene_change"
input_stream: "detection_set_a"
input_stream: "detection_set_b"
input_stream: "detection_set_c"
output_stream: "salient_region"
options:{
[mediapipe.autoflip.SignalFusingCalculatorOptions.ext]:{
signal_settings{
type: {standard: FACE_FULL}
min_score: 0.5
max_score: 0.6
}
signal_settings{
type: {custom: "text"}
min_score: 0.9
max_score: 1.0
}
signal_settings{
type: {standard: LOGO}
min_score: 0.1
max_score: 0.3
}
}
})";
TEST(SignalFusingCalculatorTest, TwoInputNoTracking) {
auto runner = absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigA));
auto input_border = absl::make_unique<bool>(false);
runner->MutableInputs()->Index(0).packets.push_back(
Adopt(input_border.release()).At(Timestamp(0)));
auto input_face =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.5
signal_type: { standard: FACE_FULL }
}
detections {
score: 0.3
signal_type: { standard: FACE_FULL }
}
)"));
runner->MutableInputs()->Index(1).packets.push_back(
Adopt(input_face.release()).At(Timestamp(0)));
auto input_ocr =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.3
signal_type: { standard: TEXT }
}
detections {
score: 0.9
signal_type: { standard: TEXT }
}
)"));
runner->MutableInputs()->Index(2).packets.push_back(
Adopt(input_ocr.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets =
runner->Outputs().Index(0).packets;
const auto& detection_set = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(detection_set.detections().size(), 4);
EXPECT_FLOAT_EQ(detection_set.detections(0).score(), .55);
EXPECT_FLOAT_EQ(detection_set.detections(1).score(), .53);
EXPECT_FLOAT_EQ(detection_set.detections(2).score(), .93);
EXPECT_FLOAT_EQ(detection_set.detections(3).score(), .99);
}
TEST(SignalFusingCalculatorTest, ThreeInputTracking) {
auto runner = absl::make_unique<CalculatorRunner>(
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigB));
auto input_border_0 = absl::make_unique<bool>(false);
runner->MutableInputs()->Index(0).packets.push_back(
Adopt(input_border_0.release()).At(Timestamp(0)));
// Time zero.
auto input_face_0 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.2
signal_type: { standard: FACE_FULL }
tracking_id: 0
}
detections {
score: 0.0
signal_type: { standard: FACE_FULL }
tracking_id: 1
}
detections {
score: 0.1
signal_type: { standard: FACE_FULL }
}
)"));
runner->MutableInputs()->Index(1).packets.push_back(
Adopt(input_face_0.release()).At(Timestamp(0)));
auto input_ocr_0 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.2
signal_type: { custom: "text" }
}
)"));
runner->MutableInputs()->Index(2).packets.push_back(
Adopt(input_ocr_0.release()).At(Timestamp(0)));
auto input_agn_0 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.3
signal_type: { standard: LOGO }
tracking_id: 0
}
)"));
runner->MutableInputs()->Index(3).packets.push_back(
Adopt(input_agn_0.release()).At(Timestamp(0)));
// Time one
auto input_border_1 = absl::make_unique<bool>(false);
runner->MutableInputs()->Index(0).packets.push_back(
Adopt(input_border_1.release()).At(Timestamp(1)));
auto input_face_1 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.7
signal_type: { standard: FACE_FULL }
tracking_id: 0
}
detections {
score: 0.9
signal_type: { standard: FACE_FULL }
tracking_id: 1
}
detections {
score: 0.2
signal_type: { standard: FACE_FULL }
}
)"));
runner->MutableInputs()->Index(1).packets.push_back(
Adopt(input_face_1.release()).At(Timestamp(1)));
auto input_ocr_1 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.3
signal_type: { custom: "text" }
}
)"));
runner->MutableInputs()->Index(2).packets.push_back(
Adopt(input_ocr_1.release()).At(Timestamp(1)));
auto input_agn_1 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.3
signal_type: { standard: LOGO }
tracking_id: 0
}
)"));
runner->MutableInputs()->Index(3).packets.push_back(
Adopt(input_agn_1.release()).At(Timestamp(1)));
// Time two
auto input_border_2 = absl::make_unique<bool>(false);
runner->MutableInputs()->Index(0).packets.push_back(
Adopt(input_border_2.release()).At(Timestamp(2)));
auto input_face_2 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.8
signal_type: { standard: FACE_FULL }
tracking_id: 0
}
detections {
score: 0.9
signal_type: { standard: FACE_FULL }
tracking_id: 1
}
detections {
score: 0.3
signal_type: { standard: FACE_FULL }
}
)"));
runner->MutableInputs()->Index(1).packets.push_back(
Adopt(input_face_2.release()).At(Timestamp(2)));
auto input_ocr_2 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.3
signal_type: { custom: "text" }
}
)"));
runner->MutableInputs()->Index(2).packets.push_back(
Adopt(input_ocr_2.release()).At(Timestamp(2)));
auto input_agn_2 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.9
signal_type: { standard: LOGO }
tracking_id: 0
}
)"));
runner->MutableInputs()->Index(3).packets.push_back(
Adopt(input_agn_2.release()).At(Timestamp(2)));
// Time three (new scene)
auto input_border_3 = absl::make_unique<bool>(true);
runner->MutableInputs()->Index(0).packets.push_back(
Adopt(input_border_3.release()).At(Timestamp(3)));
auto input_face_3 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.2
signal_type: { standard: FACE_FULL }
tracking_id: 0
}
detections {
score: 0.3
signal_type: { standard: FACE_FULL }
tracking_id: 1
}
detections {
score: 0.4
signal_type: { standard: FACE_FULL }
}
)"));
runner->MutableInputs()->Index(1).packets.push_back(
Adopt(input_face_3.release()).At(Timestamp(3)));
auto input_ocr_3 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.5
signal_type: { custom: "text" }
}
)"));
runner->MutableInputs()->Index(2).packets.push_back(
Adopt(input_ocr_3.release()).At(Timestamp(3)));
auto input_agn_3 =
absl::make_unique<DetectionSet>(ParseTextProtoOrDie<DetectionSet>(
R"(
detections {
score: 0.6
signal_type: { standard: LOGO }
tracking_id: 0
}
)"));
runner->MutableInputs()->Index(3).packets.push_back(
Adopt(input_agn_3.release()).At(Timestamp(3)));
MP_ASSERT_OK(runner->Run());
// Check time 0
std::vector<Packet> output_packets = runner->Outputs().Index(0).packets;
DetectionSet detection_set = output_packets[0].Get<DetectionSet>();
float face_id_0 = (.2 + .7 + .8) / 3;
face_id_0 = face_id_0 * .1 + .5;
float face_id_1 = (0.0 + .9 + .9) / 3;
face_id_1 = face_id_1 * .1 + .5;
float face_3 = 0.1;
face_3 = face_3 * .1 + .5;
float ocr_1 = 0.2;
ocr_1 = ocr_1 * .1 + .9;
float agn_1 = (.3 + .3 + .9) / 3;
agn_1 = agn_1 * .2 + .1;
ASSERT_EQ(detection_set.detections().size(), 5);
EXPECT_FLOAT_EQ(detection_set.detections(0).score(), face_id_0);
EXPECT_FLOAT_EQ(detection_set.detections(1).score(), face_id_1);
EXPECT_FLOAT_EQ(detection_set.detections(2).score(), face_3);
EXPECT_FLOAT_EQ(detection_set.detections(3).score(), ocr_1);
EXPECT_FLOAT_EQ(detection_set.detections(4).score(), agn_1);
// Check time 1
detection_set = output_packets[1].Get<DetectionSet>();
face_id_0 = (.2 + .7 + .8) / 3;
face_id_0 = face_id_0 * .1 + .5;
face_id_1 = (0.0 + .9 + .9) / 3;
face_id_1 = face_id_1 * .1 + .5;
face_3 = 0.2;
face_3 = face_3 * .1 + .5;
ocr_1 = 0.3;
ocr_1 = ocr_1 * .1 + .9;
agn_1 = (.3 + .3 + .9) / 3;
agn_1 = agn_1 * .2 + .1;
ASSERT_EQ(detection_set.detections().size(), 5);
EXPECT_FLOAT_EQ(detection_set.detections(0).score(), face_id_0);
EXPECT_FLOAT_EQ(detection_set.detections(1).score(), face_id_1);
EXPECT_FLOAT_EQ(detection_set.detections(2).score(), face_3);
EXPECT_FLOAT_EQ(detection_set.detections(3).score(), ocr_1);
EXPECT_FLOAT_EQ(detection_set.detections(4).score(), agn_1);
// Check time 2
detection_set = output_packets[2].Get<DetectionSet>();
face_id_0 = (.2 + .7 + .8) / 3;
face_id_0 = face_id_0 * .1 + .5;
face_id_1 = (0.0 + .9 + .9) / 3;
face_id_1 = face_id_1 * .1 + .5;
face_3 = 0.3;
face_3 = face_3 * .1 + .5;
ocr_1 = 0.3;
ocr_1 = ocr_1 * .1 + .9;
agn_1 = (.3 + .3 + .9) / 3;
agn_1 = agn_1 * .2 + .1;
ASSERT_EQ(detection_set.detections().size(), 5);
EXPECT_FLOAT_EQ(detection_set.detections(0).score(), face_id_0);
EXPECT_FLOAT_EQ(detection_set.detections(1).score(), face_id_1);
EXPECT_FLOAT_EQ(detection_set.detections(2).score(), face_3);
EXPECT_FLOAT_EQ(detection_set.detections(3).score(), ocr_1);
EXPECT_FLOAT_EQ(detection_set.detections(4).score(), agn_1);
// Check time 3 (new scene)
detection_set = output_packets[3].Get<DetectionSet>();
face_id_0 = 0.2;
face_id_0 = face_id_0 * .1 + .5;
face_id_1 = 0.3;
face_id_1 = face_id_1 * .1 + .5;
face_3 = 0.4;
face_3 = face_3 * .1 + .5;
ocr_1 = 0.5;
ocr_1 = ocr_1 * .1 + .9;
agn_1 = .6;
agn_1 = agn_1 * .2 + .1;
ASSERT_EQ(detection_set.detections().size(), 5);
EXPECT_FLOAT_EQ(detection_set.detections(0).score(), face_id_0);
EXPECT_FLOAT_EQ(detection_set.detections(1).score(), face_id_1);
EXPECT_FLOAT_EQ(detection_set.detections(2).score(), face_3);
EXPECT_FLOAT_EQ(detection_set.detections(3).score(), ocr_1);
EXPECT_FLOAT_EQ(detection_set.detections(4).score(), agn_1);
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,23 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
filegroup(
name = "test_images",
srcs = [
"dino.jpg",
],
visibility = ["//visibility:public"],
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 456 KiB

View File

@ -0,0 +1,121 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "mediapipe/examples/desktop/autoflip/calculators/video_filtering_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/status_builder.h"
namespace mediapipe {
namespace autoflip {
namespace {
constexpr char kInputFrameTag[] = "INPUT_FRAMES";
constexpr char kOutputFrameTag[] = "OUTPUT_FRAMES";
} // namespace
// This calculator filters out frames based on criteria specified in the
// options. One use case is to filter based on the aspect ratio. Future work
// can implement more filter types.
//
// Input: Video frames.
// Output: Video frames that pass all filters.
//
// Example config:
// node {
// calculator: "VideoFilteringCalculator"
// input_stream: "INPUT_FRAMES:frames"
// output_stream: "OUTPUT_FRAMES:output_frames"
// options: {
// [mediapipe.autoflip.VideoFilteringCalculatorOptions.ext]: {
// fail_if_any: true
// aspect_ratio_filter {
// target_width: 400
// target_height: 600
// filter_type: UPPER_ASPECT_RATIO_THRESHOLD
// }
// }
// }
// }
class VideoFilteringCalculator : public CalculatorBase {
public:
VideoFilteringCalculator() = default;
~VideoFilteringCalculator() override = default;
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Process(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(VideoFilteringCalculator);
::mediapipe::Status VideoFilteringCalculator::GetContract(
CalculatorContract* cc) {
cc->Inputs().Tag(kInputFrameTag).Set<ImageFrame>();
cc->Outputs().Tag(kOutputFrameTag).Set<ImageFrame>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status VideoFilteringCalculator::Process(CalculatorContext* cc) {
const auto& options = cc->Options<VideoFilteringCalculatorOptions>();
const Packet& input_packet = cc->Inputs().Tag(kInputFrameTag).Value();
const ImageFrame& frame = input_packet.Get<ImageFrame>();
RET_CHECK(options.has_aspect_ratio_filter());
const auto filter_type = options.aspect_ratio_filter().filter_type();
RET_CHECK_NE(
filter_type,
VideoFilteringCalculatorOptions::AspectRatioFilter::UNKNOWN_FILTER_TYPE);
if (filter_type ==
VideoFilteringCalculatorOptions::AspectRatioFilter::NO_FILTERING) {
cc->Outputs().Tag(kOutputFrameTag).AddPacket(input_packet);
return ::mediapipe::OkStatus();
}
const int target_width = options.aspect_ratio_filter().target_width();
const int target_height = options.aspect_ratio_filter().target_height();
RET_CHECK_GT(target_width, 0);
RET_CHECK_GT(target_height, 0);
bool should_pass = false;
cv::Mat frame_mat = ::mediapipe::formats::MatView(&frame);
const double ratio = static_cast<double>(frame_mat.cols) / frame_mat.rows;
const double target_ratio = static_cast<double>(target_width) / target_height;
if (filter_type == VideoFilteringCalculatorOptions::AspectRatioFilter::
UPPER_ASPECT_RATIO_THRESHOLD &&
ratio <= target_ratio) {
should_pass = true;
} else if (filter_type == VideoFilteringCalculatorOptions::AspectRatioFilter::
LOWER_ASPECT_RATIO_THRESHOLD &&
ratio >= target_ratio) {
should_pass = true;
}
if (should_pass) {
cc->Outputs().Tag(kOutputFrameTag).AddPacket(input_packet);
return ::mediapipe::OkStatus();
}
if (options.fail_if_any()) {
return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) << absl::Substitute(
"Failing due to aspect ratio. Target aspect ratio: $0. Frame "
"width: $1, height: $2.",
target_ratio, frame.Width(), frame.Height());
}
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,56 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/framework/calculator.proto";
message VideoFilteringCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional VideoFilteringCalculatorOptions ext = 278504113;
}
// If true, when an input frame needs be filtered out according to the filter
// type and conditions, the calculator would return a FAIL status. Otherwise,
// the calculator would simply skip the filtered frames and would not pass it
// down to downstream nodes.
optional bool fail_if_any = 1 [default = false];
message AspectRatioFilter {
// Target width and height, which define the aspect ratio
// (i.e. target_width / target_height) to compare input frames with. The
// actual values of these fields do not matter, only the ratio between them
// does. These values must be set to positive.
optional int32 target_width = 1 [default = -1];
optional int32 target_height = 2 [default = -1];
enum FilterType {
UNKNOWN_FILTER_TYPE = 0;
// Use this type when the target width and height defines an upper bound
// (inclusive) of the aspect ratio.
UPPER_ASPECT_RATIO_THRESHOLD = 1;
// Use this type when the target width and height defines a lower bound
// (inclusive) of the aspect ratio.
LOWER_ASPECT_RATIO_THRESHOLD = 2;
// Use this type to configure the calculator as a no-op pass-through node.
NO_FILTERING = 3;
}
optional FilterType filter_type = 3;
}
oneof filter {
AspectRatioFilter aspect_ratio_filter = 2;
}
}

View File

@ -0,0 +1,177 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_builder.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace autoflip {
namespace {
// Default configuration of the calculator.
CalculatorGraphConfig::Node GetCalculatorNode(
const std::string& fail_if_any, const std::string& extra_options = "") {
return ParseTextProtoOrDie<CalculatorGraphConfig::Node>(
absl::Substitute(R"(
calculator: "VideoFilteringCalculator"
input_stream: "INPUT_FRAMES:frames"
output_stream: "OUTPUT_FRAMES:output_frames"
options: {
[mediapipe.autoflip.VideoFilteringCalculatorOptions.ext]: {
fail_if_any: $0
$1
}
}
)",
fail_if_any, extra_options));
}
TEST(VideoFilterCalculatorTest, UpperBoundNoPass) {
CalculatorGraphConfig::Node config = GetCalculatorNode("false", R"(
aspect_ratio_filter {
target_width: 2
target_height: 1
filter_type: UPPER_ASPECT_RATIO_THRESHOLD
}
)");
auto runner = ::absl::make_unique<CalculatorRunner>(config);
const int kFixedWidth = 1000;
const double kAspectRatio = 5.0 / 1.0;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs()
->Tag("INPUT_FRAMES")
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
EXPECT_TRUE(output_packet.empty());
}
TEST(VerticalFrameRemovalCalculatorTest, UpperBoundPass) {
CalculatorGraphConfig::Node config = GetCalculatorNode("false", R"(
aspect_ratio_filter {
target_width: 2
target_height: 1
filter_type: UPPER_ASPECT_RATIO_THRESHOLD
}
)");
auto runner = ::absl::make_unique<CalculatorRunner>(config);
const int kWidth = 1000;
const double kAspectRatio = 1.0 / 5.0;
const double kHeight = static_cast<int>(kWidth / kAspectRatio);
auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
runner->MutableInputs()
->Tag("INPUT_FRAMES")
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
EXPECT_EQ(1, output_packet.size());
auto& output_frame = output_packet[0].Get<ImageFrame>();
EXPECT_EQ(kWidth, output_frame.Width());
EXPECT_EQ(kHeight, output_frame.Height());
}
TEST(VideoFilterCalculatorTest, LowerBoundNoPass) {
CalculatorGraphConfig::Node config = GetCalculatorNode("false", R"(
aspect_ratio_filter {
target_width: 2
target_height: 1
filter_type: LOWER_ASPECT_RATIO_THRESHOLD
}
)");
auto runner = ::absl::make_unique<CalculatorRunner>(config);
const int kFixedWidth = 1000;
const double kAspectRatio = 1.0 / 1.0;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs()
->Tag("INPUT_FRAMES")
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
EXPECT_TRUE(output_packet.empty());
}
TEST(VerticalFrameRemovalCalculatorTest, LowerBoundPass) {
CalculatorGraphConfig::Node config = GetCalculatorNode("false", R"(
aspect_ratio_filter {
target_width: 2
target_height: 1
filter_type: LOWER_ASPECT_RATIO_THRESHOLD
}
)");
auto runner = ::absl::make_unique<CalculatorRunner>(config);
const int kWidth = 1000;
const double kAspectRatio = 5.0 / 1.0;
const double kHeight = static_cast<int>(kWidth / kAspectRatio);
auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
runner->MutableInputs()
->Tag("INPUT_FRAMES")
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
EXPECT_EQ(1, output_packet.size());
auto& output_frame = output_packet[0].Get<ImageFrame>();
EXPECT_EQ(kWidth, output_frame.Width());
EXPECT_EQ(kHeight, output_frame.Height());
}
// Test that an error should be generated when fail_if_any is true.
TEST(VerticalFrameRemovalCalculatorTest, OutputError) {
CalculatorGraphConfig::Node config = GetCalculatorNode("true", R"(
aspect_ratio_filter {
target_width: 2
target_height: 1
filter_type: LOWER_ASPECT_RATIO_THRESHOLD
}
)");
auto runner = ::absl::make_unique<CalculatorRunner>(config);
const int kFixedWidth = 1000;
const double kAspectRatio = 1.0 / 1.0;
auto input_frame = ::absl::make_unique<ImageFrame>(
ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs()
->Tag("INPUT_FRAMES")
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
::mediapipe::Status status = runner->Run();
EXPECT_EQ(status.code(), ::mediapipe::StatusCode::kUnknown);
EXPECT_THAT(status.ToString(),
::testing::HasSubstr("Failing due to aspect ratio"));
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,331 @@
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
proto_library(
name = "cropping_proto",
srcs = ["cropping.proto"],
deps = [
"//mediapipe/examples/desktop/autoflip:autoflip_messages_proto",
],
)
mediapipe_cc_proto_library(
name = "cropping_cc_proto",
srcs = ["cropping.proto"],
cc_deps = ["//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":cropping_proto"],
)
proto_library(
name = "focus_point_proto",
srcs = ["focus_point.proto"],
)
mediapipe_cc_proto_library(
name = "focus_point_cc_proto",
srcs = ["focus_point.proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":focus_point_proto"],
)
cc_library(
name = "frame_crop_region_computer",
srcs = ["frame_crop_region_computer.cc"],
hdrs = ["frame_crop_region_computer.h"],
deps = [
":cropping_cc_proto",
":utils",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
)
cc_library(
name = "math_utils",
hdrs = ["math_utils.h"],
)
cc_library(
name = "piecewise_linear_function",
srcs = ["piecewise_linear_function.cc"],
hdrs = ["piecewise_linear_function.h"],
deps = [
"//mediapipe/framework/port:status",
],
)
cc_library(
name = "padding_effect_generator",
srcs = ["padding_effect_generator.cc"],
hdrs = ["padding_effect_generator.h"],
deps = [
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:commandlineflags",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
)
cc_library(
name = "scene_camera_motion_analyzer",
srcs = ["scene_camera_motion_analyzer.cc"],
hdrs = ["scene_camera_motion_analyzer.h"],
deps = [
":cropping_cc_proto",
":focus_point_cc_proto",
":math_utils",
":piecewise_linear_function",
":utils",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
],
)
cc_library(
name = "scene_cropping_viz",
srcs = ["scene_cropping_viz.cc"],
hdrs = ["scene_cropping_viz.h"],
deps = [
":cropping_cc_proto",
":focus_point_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
],
)
cc_library(
name = "polynomial_regression_path_solver",
srcs = ["polynomial_regression_path_solver.cc"],
hdrs = ["polynomial_regression_path_solver.h"],
deps = [
":focus_point_cc_proto",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@ceres_solver//:ceres",
],
)
cc_library(
name = "scene_cropper",
srcs = ["scene_cropper.cc"],
hdrs = ["scene_cropper.h"],
deps = [
":cropping_cc_proto",
":focus_point_cc_proto",
":polynomial_regression_path_solver",
":utils",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
],
)
cc_library(
name = "utils",
srcs = ["utils.cc"],
hdrs = ["utils.h"],
deps = [
":cropping_cc_proto",
":math_utils",
":piecewise_linear_function",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/memory",
],
)
cc_test(
name = "frame_crop_region_computer_test",
srcs = ["frame_crop_region_computer_test.cc"],
deps = [
":cropping_cc_proto",
":frame_crop_region_computer",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/port:gtest_main",
"@com_google_absl//absl/memory",
],
)
cc_test(
name = "piecewise_linear_function_test",
srcs = ["piecewise_linear_function_test.cc"],
deps = [
":piecewise_linear_function",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status",
],
)
cc_test(
name = "scene_camera_motion_analyzer_test",
srcs = ["scene_camera_motion_analyzer_test.cc"],
data = [
"//mediapipe/examples/desktop/autoflip/quality/testdata:camera_motion_tracking_scene_frame_results.csv",
],
deps = [
":focus_point_cc_proto",
":piecewise_linear_function",
":scene_camera_motion_analyzer",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/port:file_helpers",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "padding_effect_generator_test",
srcs = ["padding_effect_generator_test.cc"],
data = [
"//mediapipe/examples/desktop/autoflip/quality/testdata:google.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_0.3.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_0.3_solid_background.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_0.6.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_0.6_solid_background.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_1.6.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_1.6_solid_background.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_1.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_1_solid_background.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_2.5.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_2.5_solid_background.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_3.4.jpg",
"//mediapipe/examples/desktop/autoflip/quality/testdata:result_3.4_solid_background.jpg",
],
deps = [
":padding_effect_generator",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:commandlineflags",
"//mediapipe/framework/port:file_helpers",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "polynomial_regression_path_solver_test",
srcs = ["polynomial_regression_path_solver_test.cc"],
deps = [
":focus_point_cc_proto",
":polynomial_regression_path_solver",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:status",
],
)
cc_test(
name = "scene_cropper_test",
size = "small",
timeout = "short",
srcs = ["scene_cropper_test.cc"],
deps = [
":focus_point_cc_proto",
":scene_cropper",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
],
)
cc_test(
name = "utils_test",
srcs = ["utils_test.cc"],
deps = [
":cropping_cc_proto",
":utils",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:opencv_core",
],
)
proto_library(
name = "visual_scorer_proto",
srcs = ["visual_scorer.proto"],
)
mediapipe_cc_proto_library(
name = "visual_scorer_cc_proto",
srcs = ["visual_scorer.proto"],
visibility = ["//mediapipe/examples:__subpackages__"],
deps = [":visual_scorer_proto"],
)
cc_library(
name = "visual_scorer",
srcs = ["visual_scorer.cc"],
hdrs = ["visual_scorer.h"],
deps = [
":visual_scorer_cc_proto",
"//mediapipe/examples/desktop/autoflip:autoflip_messages_cc_proto",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
)
cc_test(
name = "visual_scorer_test",
srcs = [
"visual_scorer_test.cc",
],
linkstatic = 1,
deps = [
":visual_scorer",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
],
)

View File

@ -0,0 +1,217 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/autoflip_messages.proto";
// All relevant information for key frames, including timestamp and detected
// features. This object should be generated by calling PackKeyFrameInfo() in
// the util namespace. It is passed in to ComputeFrameCropRegion().
message KeyFrameInfo {
// Frame timestamp (in microseconds).
optional int64 timestamp_ms = 1;
// Detected features.
optional DetectionSet detections = 2;
}
// User-specified key frame crop options (such as target width and height).
message KeyFrameCropOptions {
// Target crop size.
// Note: if you are using the SceneCroppingCalculator, DO NOT set these fields
// manually as they will be then overwritten inside the calculator.
optional int32 target_width = 1;
optional int32 target_height = 2;
// Option for how region score is aggregated from individual feature scores.
// TODO: consider merging this enum type into the signal fusing
// calculator.
enum ScoreAggregationType {
// Unknown value (should not be used).
UNKNOWN = 0;
// Takes the score of the feature with maximum score.
MAXIMUM = 1;
// Takes the sum of the scores of the required regions.
SUM_REQUIRED = 2;
// Takes the sum of the scores of all the regions that are fully covered.
SUM_ALL = 3;
// Uses a constant score 1.0 for all crop regions.
CONSTANT = 4;
}
optional ScoreAggregationType score_aggregation_type = 3 [default = SUM_ALL];
// Minimum centered coverage fraction (in length, not area) for a non-required
// region to be included in the crop region. Applies to both dimensions.
optional float non_required_region_min_coverage_fraction = 4 [default = 0.5];
}
// Key frame crop result containing the crop region rectangle, along with
// summary information on the cropping, such as whether all required regions
// could fit inside the target size, and what fraction of non-required regions
// are fully covered. This object is returned by ComputeFrameCropRegion() in
// the FrameCropRegionComputer class.
message KeyFrameCropResult {
// Successfully covers all required features. If there are no required
// regions, this field is set to true.
optional bool are_required_regions_covered_in_target_size = 1;
// Fraction of non-required features covered.
optional float fraction_non_required_covered = 2;
// Whether required crop region is empty (no detections).
optional bool required_region_is_empty = 3;
// Whether (full) crop region is empty (no detections).
optional bool region_is_empty = 4;
// Computed required crop region.
optional Rect required_region = 5;
// Computed (full) crop region.
optional Rect region = 6;
// Score of the computed crop region based on the detected features.
optional float region_score = 7;
}
// Compact processed scene key frame info containing timestamp, center position,
// and score. Each key frame has one SceneKeyFrameCompactInfo in
// SceneKeyFrameCropSummary.
message SceneKeyFrameCompactInfo {
// Key frame timestamp (in microseconds).
optional int64 timestamp_ms = 1;
// Key frame crop region center in the horizontal/vertical directions (in
// pixels).
optional float center_x = 2;
optional float center_y = 3;
// Key frame crop region score.
optional float score = 4;
}
// Summary information for the key frame crop results in a scene. Computed by
// AnalyzeSceneKeyFrameCropResults() in the SceneCameraMotionAnalyzer class.
// Used to decide camera motion type and populate salient point frames.
message SceneKeyFrameCropSummary {
// Scene frame size.
optional int32 scene_frame_width = 1;
optional int32 scene_frame_height = 2;
// Number of key frames in the scene.
optional int32 num_key_frames = 3;
// Scene key frame compact infos.
repeated SceneKeyFrameCompactInfo key_frame_compact_infos = 4;
// The minimum/maximum values of key frames' crop centers in the horizontal/
// vertical directions.
optional float key_frame_center_min_x = 5;
optional float key_frame_center_max_x = 6;
optional float key_frame_center_min_y = 7;
optional float key_frame_center_max_y = 8;
// The union of all the key frame required crop regions. When camera is steady
// the crop window is set to cover this union.
optional Rect key_frame_required_crop_region_union = 9;
// The minimum/maximum scores of key frames' crop regions.
optional float key_frame_min_score = 10;
optional float key_frame_max_score = 11;
// Size of the scene's crop window, calculated as the maximum of the target
// size and the largest size of the key frames' crop regions in the scene.
optional int32 crop_window_width = 12;
optional int32 crop_window_height = 13;
// Indicator for whether the scene has any frame with any salient region.
optional bool has_salient_region = 14;
// Indicator for whether the scene has any frame with any required salient
// region.
optional bool has_required_salient_region = 15;
// Percentage of key frames that are successfully cropped (i.e. covers all
// required regions inside the target size).
optional float frame_success_rate = 16;
// Amount of motion in the horizontal/vertical direction (i.e. the horizontal/
// vertical range of the key frame crop centers' position as a fraction of
// frame width/height).
optional float horizontal_motion_amount = 17;
optional float vertical_motion_amount = 18;
}
// Scene camera motion determined by the SceneCameraMotionAnalyzer class.
message SceneCameraMotion {
// Camera focuses on a fixed center throughout the scene.
message SteadyMotion {
// Steady look-at center in horizontal/vertical directions (in pixels).
optional float steady_look_at_center_x = 1;
optional float steady_look_at_center_y = 2;
}
// Camera tracks key frame salient region centers.
message TrackingMotion {
// Fields to be added if necessary.
}
// Camera sweeps from one point to another.
message SweepingMotion {
// Starting and ending center positions for camera sweeping in pixels.
optional float sweep_start_center_x = 1;
optional float sweep_start_center_y = 2;
optional float sweep_end_center_x = 3;
optional float sweep_end_center_y = 4;
}
oneof motion_type {
SteadyMotion steady_motion = 1;
TrackingMotion tracking_motion = 2;
SweepingMotion sweeping_motion = 3;
// Other types that we might support later.
}
}
// User-specified options for analyzing scene camera motion from a collection of
// key frame crop regions.
message SceneCameraMotionAnalyzerOptions {
// If there is small motion within the scene keep the camera steady at the
// center.
optional float motion_stabilization_threshold_percent = 1 [default = .30];
// Snap to center if there is small motion and already focused closed to the
// center.
optional float snap_center_max_distance_percent = 2 [default = .08];
// Maximum weight for a constraint. Scales scores accordingly so that the
// maximum score is equal to this weight.
optional float maximum_salient_point_weight = 3 [default = 100.0];
// Normalized bound for SalientPoint's in the frame from the border. This is
// uniformly applied to the left, right, top, and bottom. It should be
// strictly less than 0.5. A narrower bound (closer to 0.5) gives better
// constraint enforcement.
optional float salient_point_bound = 4 [default = 0.48];
// Indicator for whether sweeping is allowed. Note that if a scene can be
// seamlessly padded with solid background color, sweeping will be disabled
// regardlessly of the value of this flag.
optional bool allow_sweeping = 5 [default = true];
// Minimal scene time span in seconds to allow camera sweeping.
optional float minimum_scene_span_sec_for_sweeping = 6 [default = 1.0];
// If success rate in a scene is less than this, then use camera sweeping.
optional float minimum_success_rate_for_sweeping = 7 [default = 0.4];
// If true, sweep entire frame. Otherwise, sweep the crop window.
optional bool sweep_entire_frame = 8 [default = true];
}
// Video cropping summary information for debugging/statistics.
message VideoCroppingSummary {
message SceneCroppingSummary {
// Scene span in seconds.
optional float start_sec = 1;
optional float end_sec = 2;
// Indicator for whether this scene was cut at a real physical scene
// boundary (as opposed to force flush).
optional bool is_end_of_scene = 3;
// Scene camera motion.
optional SceneCameraMotion camera_motion = 4;
// Indicator for whether the scene is padded.
optional bool is_padded = 5;
}
// Cropping summaries for all the scenes in the video.
repeated SceneCroppingSummary scene_summaries = 1;
}

View File

@ -0,0 +1,79 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.autoflip;
// Focus point location (normalized w.r.t. frame_width and frame_height, i.e.
// specified in the domain [0, 1] x [0, 1]).
// For TYPE_INCLUDE:
// During retargeting and stabilization focus points introduce constraints
// that will try to keep the normalized location in the rectangle
// frame_size - normalized bounds.
// For this soft constraints are used, therefore the weight specifies
// how "important" the focus point is (higher is better).
// In particular for each point p the retargeter introduces two pairs of
// constraints of the form:
// x - slack < width - right
// and x + slack > 0 + left, with slack > 0
// where the weight specifies the importance of the slack.
//
// For TYPE_EXCLUDE_*:
// Similar to above, but constraints are introduced to keep
// the point to the left of the left bound OR the right of the right bound.
// In particular:
// x - slack < left OR
// x + slack >= right
// Similar to above, the weight specifies the importance of the slack.
//
// Note: Choosing a too high weight can lead to
// jerkiness as the stabilization essentially starts tracking the focus point.
message FocusPoint {
// Normalized location of the point (within domain [0, 1] x [0, 1].
optional float norm_point_x = 1 [default = 0.0];
optional float norm_point_y = 2 [default = 0.0];
enum FocusPointType {
TYPE_INCLUDE = 1;
TYPE_EXCLUDE_LEFT = 2;
TYPE_EXCLUDE_RIGHT = 3;
}
// Focus point type. By default we try to frame the focus point within
// the bounding box specified by left, bottom, right, top. Alternatively, one
// can choose to exclude the point. For details, see discussion above.
optional FocusPointType type = 11 [default = TYPE_INCLUDE];
// Bounds are specified in normalized coordinates [0, 1], FROM the specified
// border. Opposing bounds (e.g. left and right) may not add to values
// larger than 1.
// Default bounds center focus point within centering third of the frame.
optional float left = 3 [default = 0.3];
optional float bottom = 4 [default = 0.3];
optional float right = 9 [default = 0.3];
optional float top = 10 [default = 0.3];
optional float weight = 5 [default = 15];
extensions 20000 to max;
}
// Aggregates FocusPoint's for a frame.
message FocusPointFrame {
repeated FocusPoint point = 1;
extensions 20000 to max;
}

View File

@ -0,0 +1,259 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/quality/frame_crop_region_computer.h"
#include <cmath>
#include "mediapipe/examples/desktop/autoflip/quality/utils.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace autoflip {
::mediapipe::Status FrameCropRegionComputer::ExpandSegmentUnderConstraint(
const Segment& segment_to_add, const Segment& base_segment,
const int max_length, Segment* combined_segment,
CoverType* cover_type) const {
RET_CHECK(combined_segment != nullptr) << "Combined segment is null.";
RET_CHECK(cover_type != nullptr) << "Cover type is null.";
const LeftPoint segment_to_add_left = segment_to_add.first;
const RightPoint segment_to_add_right = segment_to_add.second;
RET_CHECK(segment_to_add_right >= segment_to_add_left)
<< "Invalid segment to add.";
const LeftPoint base_segment_left = base_segment.first;
const RightPoint base_segment_right = base_segment.second;
RET_CHECK(base_segment_right >= base_segment_left) << "Invalid base segment.";
const int base_length = base_segment_right - base_segment_left;
RET_CHECK(base_length <= max_length)
<< "Base segment length exceeds max length.";
const int segment_to_add_length = segment_to_add_right - segment_to_add_left;
const int max_leftout_amount =
std::ceil((1.0 - options_.non_required_region_min_coverage_fraction()) *
segment_to_add_length / 2);
const LeftPoint min_coverage_segment_to_add_left =
segment_to_add_left + max_leftout_amount;
const LeftPoint min_coverage_segment_to_add_right =
segment_to_add_right - max_leftout_amount;
LeftPoint combined_segment_left =
std::min(segment_to_add_left, base_segment_left);
RightPoint combined_segment_right =
std::max(segment_to_add_right, base_segment_right);
LeftPoint min_coverage_combined_segment_left =
std::min(min_coverage_segment_to_add_left, base_segment_left);
RightPoint min_coverage_combined_segment_right =
std::max(min_coverage_segment_to_add_right, base_segment_right);
if ((combined_segment_right - combined_segment_left) <= max_length) {
*cover_type = FULLY_COVERED;
} else if (min_coverage_combined_segment_right -
min_coverage_combined_segment_left <=
max_length) {
*cover_type = PARTIALLY_COVERED;
combined_segment_left = min_coverage_combined_segment_left;
combined_segment_right = min_coverage_combined_segment_right;
} else {
*cover_type = NOT_COVERED;
combined_segment_left = base_segment_left;
combined_segment_right = base_segment_right;
}
*combined_segment =
std::make_pair(combined_segment_left, combined_segment_right);
return ::mediapipe::OkStatus();
}
::mediapipe::Status FrameCropRegionComputer::ExpandRectUnderConstraints(
const Rect& rect_to_add, const int max_width, const int max_height,
Rect* base_rect, CoverType* cover_type) const {
RET_CHECK(base_rect != nullptr) << "Base rect is null.";
RET_CHECK(cover_type != nullptr) << "Cover type is null.";
RET_CHECK(base_rect->width() <= max_width &&
base_rect->height() <= max_height)
<< "Base rect already exceeds target size.";
const LeftPoint rect_to_add_left = rect_to_add.x();
const RightPoint rect_to_add_right = rect_to_add.x() + rect_to_add.width();
const LeftPoint rect_to_add_top = rect_to_add.y();
const RightPoint rect_to_add_bottom = rect_to_add.y() + rect_to_add.height();
const LeftPoint base_rect_left = base_rect->x();
const RightPoint base_rect_right = base_rect->x() + base_rect->width();
const LeftPoint base_rect_top = base_rect->y();
const RightPoint base_rect_bottom = base_rect->y() + base_rect->height();
Segment horizontal_combined_segment, vertical_combined_segment;
CoverType horizontal_cover_type, vertical_cover_type;
const auto horizontal_status = ExpandSegmentUnderConstraint(
std::make_pair(rect_to_add_left, rect_to_add_right),
std::make_pair(base_rect_left, base_rect_right), max_width,
&horizontal_combined_segment, &horizontal_cover_type);
MP_RETURN_IF_ERROR(horizontal_status);
const auto vertical_status = ExpandSegmentUnderConstraint(
std::make_pair(rect_to_add_top, rect_to_add_bottom),
std::make_pair(base_rect_top, base_rect_bottom), max_height,
&vertical_combined_segment, &vertical_cover_type);
MP_RETURN_IF_ERROR(vertical_status);
if (horizontal_cover_type == NOT_COVERED ||
vertical_cover_type == NOT_COVERED) {
// Gives up if the segment is not covered in either direction.
*cover_type = NOT_COVERED;
} else {
// Tries to (partially) cover the new rect to be added.
base_rect->set_x(horizontal_combined_segment.first);
base_rect->set_y(vertical_combined_segment.first);
base_rect->set_width(horizontal_combined_segment.second -
horizontal_combined_segment.first);
base_rect->set_height(vertical_combined_segment.second -
vertical_combined_segment.first);
if (horizontal_cover_type == FULLY_COVERED &&
vertical_cover_type == FULLY_COVERED) {
*cover_type = FULLY_COVERED;
} else {
*cover_type = PARTIALLY_COVERED;
}
}
return ::mediapipe::OkStatus();
}
void FrameCropRegionComputer::UpdateCropRegionScore(
const KeyFrameCropOptions::ScoreAggregationType score_aggregation_type,
const float feature_score, const bool is_required,
float* crop_region_score) {
if (feature_score < 0.0) {
LOG(WARNING) << "Ignoring negative score";
return;
}
switch (score_aggregation_type) {
case KeyFrameCropOptions::MAXIMUM: {
*crop_region_score = std::max(feature_score, *crop_region_score);
break;
}
case KeyFrameCropOptions::SUM_REQUIRED: {
if (is_required) {
*crop_region_score += feature_score;
}
break;
}
case KeyFrameCropOptions::SUM_ALL: {
*crop_region_score += feature_score;
break;
}
case KeyFrameCropOptions::CONSTANT: {
*crop_region_score = 1.0;
break;
}
default: {
LOG(WARNING) << "Unknown CropRegionScoreType " << score_aggregation_type;
break;
}
}
}
::mediapipe::Status FrameCropRegionComputer::ComputeFrameCropRegion(
const KeyFrameInfo& frame_info, KeyFrameCropResult* crop_result) const {
RET_CHECK(crop_result != nullptr) << "KeyFrameCropResult is null.";
// Sorts required and non-required regions.
std::vector<SalientRegion> required_regions, non_required_regions;
const auto sort_status = SortDetections(
frame_info.detections(), &required_regions, &non_required_regions);
MP_RETURN_IF_ERROR(sort_status);
int target_width = options_.target_width();
int target_height = options_.target_height();
auto* region = crop_result->mutable_region();
RET_CHECK(region != nullptr) << "Crop region is null.";
bool crop_region_is_empty = true;
float crop_region_score = 0.0;
// Gets union of all required regions.
for (int i = 0; i < required_regions.size(); ++i) {
const Rect& required_region = required_regions[i].location();
if (crop_region_is_empty) {
*region = required_region;
crop_region_is_empty = false;
} else {
RectUnion(required_region, region);
}
UpdateCropRegionScore(options_.score_aggregation_type(),
required_regions[i].score(), true,
&crop_region_score);
}
crop_result->set_required_region_is_empty(crop_region_is_empty);
if (!crop_region_is_empty) {
*crop_result->mutable_required_region() = *region;
crop_result->set_are_required_regions_covered_in_target_size(
region->width() <= target_width && region->height() <= target_height);
target_width = std::max(target_width, region->width());
target_height = std::max(target_height, region->height());
} else {
crop_result->set_are_required_regions_covered_in_target_size(true);
}
// Tries to fit non-required regions.
int num_covered = 0;
for (int i = 0; i < non_required_regions.size(); ++i) {
const Rect& non_required_region = non_required_regions[i].location();
CoverType cover_type = NOT_COVERED;
if (crop_region_is_empty) {
// If the crop region is empty, tries to expand an empty base region
// at the center of this region to include itself.
region->set_x(non_required_region.x() + non_required_region.width() / 2);
region->set_y(non_required_region.y() + non_required_region.height() / 2);
region->set_width(0);
region->set_height(0);
MP_RETURN_IF_ERROR(ExpandRectUnderConstraints(non_required_region,
target_width, target_height,
region, &cover_type));
if (cover_type != NOT_COVERED) {
crop_region_is_empty = false;
}
} else {
// Otherwise tries to expand the crop region to cover the non-required
// region under target size constraint.
MP_RETURN_IF_ERROR(ExpandRectUnderConstraints(non_required_region,
target_width, target_height,
region, &cover_type));
}
// Updates number of covered non-required regions and score.
if (cover_type == FULLY_COVERED) {
num_covered++;
UpdateCropRegionScore(options_.score_aggregation_type(),
non_required_regions[i].score(), false,
&crop_region_score);
}
}
const float fraction_covered =
non_required_regions.empty()
? 0.0
: static_cast<float>(num_covered) / non_required_regions.size();
crop_result->set_fraction_non_required_covered(fraction_covered);
crop_result->set_region_is_empty(crop_region_is_empty);
crop_result->set_region_score(crop_region_score);
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,110 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_FRAME_CROP_REGION_COMPUTER_H_
#define MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_FRAME_CROP_REGION_COMPUTER_H_
#include <vector>
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/cropping.pb.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace autoflip {
// This class computes per-frame crop regions based on crop frame options.
// It aggregates required regions and then tries to fit in non-required regions
// with best effort. It does not make use of static features.
class FrameCropRegionComputer {
public:
FrameCropRegionComputer() = delete;
explicit FrameCropRegionComputer(
const KeyFrameCropOptions& crop_frame_options)
: options_(crop_frame_options) {}
~FrameCropRegionComputer() {}
// Computes the crop region for the key frame using the crop options. The crop
// region covers all the required regions, and attempts to cover the
// non-required regions with best effort. Note: this function does not
// consider static features, and simply tries to fit the detected features
// within the target frame size. The score of the crop region is aggregated
// from individual feature scores given the score aggregation type.
::mediapipe::Status ComputeFrameCropRegion(
const KeyFrameInfo& frame_info, KeyFrameCropResult* crop_result) const;
protected:
// A segment is a 1-d object defined by its left and right point.
using LeftPoint = int;
using RightPoint = int;
using Segment = std::pair<LeftPoint, RightPoint>;
// How much a segment is covered in the combined segment.
enum CoverType {
FULLY_COVERED = 1,
PARTIALLY_COVERED = 2,
NOT_COVERED = 3,
};
// Expands a base segment to cover a segment to be added given maximum length
// constraint. The operation is best-effort. The resulting enlarged segment is
// set in the returned combined segment. Returns a CoverType to indicate the
// coverage of the segment to be added in the combined segment.
// There are 3 cases:
// case 1: the length of the union of the two segments is not larger than
// the maximum length.
// In this case the combined segment is simply the union, and cover
// type is FULLY_COVERED.
// case 2: the union of the two segments exceeds the maximum length, but the
// union of the base segment and required minimum centered fraction
// of the new segment fits in the maximum length.
// In this case the combined segment is this latter union, and cover
// type is PARTIALLY_COVERED.
// case 3: the union of the base segment and required minimum centered
// fraction of the new segment exceeds the maximum length.
// In this case the combined segment is the base segment, and cover
// type is NOT_COVERED.
::mediapipe::Status ExpandSegmentUnderConstraint(
const Segment& segment_to_add, const Segment& base_segment,
const int max_length, Segment* combined_segment,
CoverType* cover_type) const;
// Expands a base rectangle to cover a new rectangle to be added under width
// and height constraints. The operation is best-effort. It considers
// horizontal and vertical directions separately, using the
// ExpandSegmentUnderConstraint function for each direction. The cover type is
// FULLY_COVERED if the new rectangle is fully covered in both directions,
// PARTIALLY_COVERED if it is at least partially covered in both directions,
// and NOT_COVERED if it is not covered in either direction.
::mediapipe::Status ExpandRectUnderConstraints(const Rect& rect_to_add,
const int max_width,
const int max_height,
Rect* base_rect,
CoverType* cover_type) const;
// Updates crop region score given current feature score, whether the feature
// is required, and the score aggregation type. Ignores negative scores.
static void UpdateCropRegionScore(
const KeyFrameCropOptions::ScoreAggregationType score_aggregation_type,
const float feature_score, const bool is_required,
float* crop_region_score);
private:
// Crop frame options.
KeyFrameCropOptions options_;
};
} // namespace autoflip
} // namespace mediapipe
#endif // MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_FRAME_CROP_REGION_COMPUTER_H_

View File

@ -0,0 +1,579 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/quality/frame_crop_region_computer.h"
#include <memory>
#include "absl/memory/memory.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/quality/cropping.pb.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace autoflip {
using ::testing::HasSubstr;
const int kSegmentMaxLength = 10;
const int kTargetWidth = 500;
const int kTargetHeight = 1000;
// Makes a rectangle given the corner (x, y) and the size (width, height).
Rect MakeRect(const int x, const int y, const int width, const int height) {
Rect rect;
rect.set_x(x);
rect.set_y(y);
rect.set_width(width);
rect.set_height(height);
return rect;
}
// Adds a detection to the key frame info given its location, whether it is
// required, and its score. The score is default to 1.0.
void AddDetection(const Rect& rect, const bool is_required,
KeyFrameInfo* key_frame_info, const float score = 1.0) {
auto* detection = key_frame_info->mutable_detections()->add_detections();
*(detection->mutable_location()) = rect;
detection->set_score(score);
detection->set_is_required(is_required);
}
// Makes key frame crop options given target width and height.
KeyFrameCropOptions MakeKeyFrameCropOptions(const int target_width,
const int target_height) {
KeyFrameCropOptions options;
options.set_target_width(target_width);
options.set_target_height(target_height);
return options;
}
// Checks whether rectangle a is inside rectangle b.
bool CheckRectIsInside(const Rect& rect_a, const Rect& rect_b) {
return (rect_b.x() <= rect_a.x() && rect_b.y() <= rect_a.y() &&
rect_b.x() + rect_b.width() >= rect_a.x() + rect_a.width() &&
rect_b.y() + rect_b.height() >= rect_a.y() + rect_a.height());
}
// Checks whether two rectangles are equal.
bool CheckRectsEqual(const Rect& rect1, const Rect& rect2) {
return (rect1.x() == rect2.x() && rect1.y() == rect2.y() &&
rect1.width() == rect2.width() && rect1.height() == rect2.height());
}
// Checks whether two rectangles have non-zero overlapping area.
bool CheckRectsOverlap(const Rect& rect1, const Rect& rect2) {
const int x1_left = rect1.x(), x1_right = rect1.x() + rect1.width();
const int y1_top = rect1.y(), y1_bottom = rect1.y() + rect1.height();
const int x2_left = rect2.x(), x2_right = rect2.x() + rect2.width();
const int y2_top = rect2.y(), y2_bottom = rect2.y() + rect2.height();
const int x_left = std::max(x1_left, x2_left);
const int x_right = std::min(x1_right, x2_right);
const int y_top = std::max(y1_top, y2_top);
const int y_bottom = std::min(y1_bottom, y2_bottom);
return (x_right > x_left && y_bottom > y_top);
}
// Checks that all the required regions in the detections in KeyFrameInfo are
// covered in the KeyFrameCropResult.
void CheckRequiredRegionsAreCovered(const KeyFrameInfo& key_frame_info,
const KeyFrameCropResult& result) {
bool has_required = false;
for (int i = 0; i < key_frame_info.detections().detections_size(); ++i) {
const auto& detection = key_frame_info.detections().detections(i);
if (detection.is_required()) {
has_required = true;
EXPECT_TRUE(
CheckRectIsInside(detection.location(), result.required_region()));
}
}
EXPECT_EQ(has_required, !result.required_region_is_empty());
if (has_required) {
EXPECT_FALSE(result.region_is_empty());
EXPECT_TRUE(CheckRectIsInside(result.required_region(), result.region()));
}
}
// Testable class that can access protected types and methods in the class.
class TestableFrameCropRegionComputer : public FrameCropRegionComputer {
public:
explicit TestableFrameCropRegionComputer(const KeyFrameCropOptions& options)
: FrameCropRegionComputer(options) {}
using FrameCropRegionComputer::CoverType;
using FrameCropRegionComputer::ExpandRectUnderConstraints;
using FrameCropRegionComputer::ExpandSegmentUnderConstraint;
using FrameCropRegionComputer::FULLY_COVERED;
using FrameCropRegionComputer::LeftPoint; // int
using FrameCropRegionComputer::NOT_COVERED;
using FrameCropRegionComputer::PARTIALLY_COVERED;
using FrameCropRegionComputer::RightPoint; // int
using FrameCropRegionComputer::Segment; // std::pair<int, int>
using FrameCropRegionComputer::UpdateCropRegionScore;
// Makes a segment from two endpoints.
static Segment MakeSegment(const LeftPoint left, const RightPoint right) {
return std::make_pair(left, right);
}
// Checks that two segments are equal.
static bool CheckSegmentsEqual(const Segment& segment1,
const Segment& segment2) {
return (segment1.first == segment2.first &&
segment1.second == segment2.second);
}
};
using TestClass = TestableFrameCropRegionComputer;
// Returns an instance of the testable class given
// non_required_region_min_coverage_fraction.
std::unique_ptr<TestClass> GetTestableClass(
const float non_required_region_min_coverage_fraction = 0.5) {
KeyFrameCropOptions options;
options.set_non_required_region_min_coverage_fraction(
non_required_region_min_coverage_fraction);
auto test_class = absl::make_unique<TestClass>(options);
return test_class;
}
// Checks that ExpandSegmentUnderConstraint checks output pointers are not null.
TEST(FrameCropRegionComputerTest, ExpandSegmentUnderConstraintCheckNull) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
TestClass::Segment base_segment = TestClass::MakeSegment(10, 15);
TestClass::Segment segment_to_add = TestClass::MakeSegment(5, 8);
TestClass::Segment combined_segment;
// Combined segment is null.
auto status = test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, nullptr, &cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Combined segment is null."));
// Cover type is null.
status = test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
nullptr);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Cover type is null."));
}
// Checks that ExpandSegmentUnderConstraint checks input segments are valid.
TEST(FrameCropRegionComputerTest, ExpandSegmentUnderConstraintCheckValid) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
TestClass::Segment combined_segment;
// Invalid base segment.
TestClass::Segment base_segment = TestClass::MakeSegment(15, 10);
TestClass::Segment segment_to_add = TestClass::MakeSegment(5, 8);
auto status = test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Invalid base segment."));
// Invalid segment to add.
base_segment = TestClass::MakeSegment(10, 15);
segment_to_add = TestClass::MakeSegment(8, 5);
status = test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Invalid segment to add."));
// Base segment exceeds max length.
base_segment = TestClass::MakeSegment(10, 100);
segment_to_add = TestClass::MakeSegment(5, 8);
status = test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
HasSubstr("Base segment length exceeds max length."));
}
// Checks that ExpandSegmentUnderConstraint handles case 1 properly: the length
// of the union of the two segments is not larger than the maximum length.
TEST(FrameCropRegionComputerTest, ExpandSegmentUnderConstraintCase1) {
auto test_class = GetTestableClass();
TestClass::Segment combined_segment;
TestClass::CoverType cover_type;
TestClass::Segment base_segment = TestClass::MakeSegment(5, 10);
TestClass::Segment segment_to_add = TestClass::MakeSegment(3, 8);
MP_EXPECT_OK(test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type));
EXPECT_EQ(cover_type, TestClass::FULLY_COVERED);
EXPECT_TRUE(TestClass::CheckSegmentsEqual(combined_segment,
TestClass::MakeSegment(3, 10)));
}
// Checks that ExpandSegmentUnderConstraint handles case 2 properly: the union
// of the two segments exceeds the maximum length, but the union of the base
// segment with the minimum coverage fraction of the new segment is within the
// maximum length.
TEST(FrameCropRegionComputerTest, ExpandSegmentUnderConstraintCase2) {
TestClass::Segment combined_segment;
TestClass::CoverType cover_type;
TestClass::Segment base_segment = TestClass::MakeSegment(4, 8);
TestClass::Segment segment_to_add = TestClass::MakeSegment(0, 16);
auto test_class = GetTestableClass();
MP_EXPECT_OK(test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type));
EXPECT_EQ(cover_type, TestClass::PARTIALLY_COVERED);
EXPECT_TRUE(TestClass::CheckSegmentsEqual(combined_segment,
TestClass::MakeSegment(4, 12)));
}
// Checks that ExpandSegmentUnderConstraint handles case 3 properly: the union
// of the base segment with the minimum coverage fraction of the new segment
// exceeds the maximum length.
TEST(FrameCropRegionComputerTest, ExpandSegmentUnderConstraintCase3) {
TestClass::Segment combined_segment;
TestClass::CoverType cover_type;
auto test_class = GetTestableClass();
TestClass::Segment base_segment = TestClass::MakeSegment(6, 14);
TestClass::Segment segment_to_add = TestClass::MakeSegment(0, 4);
MP_EXPECT_OK(test_class->ExpandSegmentUnderConstraint(
segment_to_add, base_segment, kSegmentMaxLength, &combined_segment,
&cover_type));
EXPECT_EQ(cover_type, TestClass::NOT_COVERED);
EXPECT_TRUE(TestClass::CheckSegmentsEqual(combined_segment, base_segment));
}
// Checks that ExpandRectUnderConstraints checks output pointers are not null.
TEST(FrameCropRegionComputerTest, ExpandRectUnderConstraintsChecksNotNull) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
Rect base_rect, rect_to_add;
// Base rect is null.
auto status = test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, nullptr, &cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Base rect is null."));
// Cover type is null.
status = test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, nullptr);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(), HasSubstr("Cover type is null."));
}
// Checks that ExpandRectUnderConstraints checks base rect is valid.
TEST(FrameCropRegionComputerTest, ExpandRectUnderConstraintsChecksBaseValid) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
Rect base_rect = MakeRect(0, 0, 2 * kTargetWidth, 2 * kTargetHeight);
Rect rect_to_add;
const auto status = test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, &cover_type);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.ToString(),
HasSubstr("Base rect already exceeds target size."));
}
// Checks that ExpandRectUnderConstraints properly handles the case where the
// rectangle to be added can be fully covered.
TEST(FrameCropRegionComputerTest, ExpandRectUnderConstraintsFullyCovered) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
Rect base_rect = MakeRect(0, 0, 50, 50);
Rect rect_to_add = MakeRect(30, 30, 30, 30);
MP_EXPECT_OK(test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, &cover_type));
EXPECT_EQ(cover_type, TestClass::FULLY_COVERED);
EXPECT_TRUE(CheckRectsEqual(base_rect, MakeRect(0, 0, 60, 60)));
}
// Checks that ExpandRectUnderConstraints properly handles the case where the
// rectangle to be added can be partially covered.
TEST(FrameCropRegionComputerTest, ExpandRectUnderConstraintsPartiallyCovered) {
auto test_class = GetTestableClass();
TestClass::CoverType cover_type;
// Rectangle to be added can be partially covered in both both dimensions.
Rect base_rect = MakeRect(0, 0, 500, 500);
Rect rect_to_add = MakeRect(0, 300, 600, 900);
MP_EXPECT_OK(test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, &cover_type));
EXPECT_EQ(cover_type, TestClass::PARTIALLY_COVERED);
EXPECT_TRUE(CheckRectsEqual(base_rect, MakeRect(0, 0, 500, 975)));
// Rectangle to be added can be fully covered in one dimension and partially
// covered in the other dimension.
base_rect = MakeRect(0, 0, 400, 500);
rect_to_add = MakeRect(100, 300, 400, 900);
MP_EXPECT_OK(test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, &cover_type));
EXPECT_EQ(cover_type, TestClass::PARTIALLY_COVERED);
EXPECT_TRUE(CheckRectsEqual(base_rect, MakeRect(0, 0, 500, 975)));
}
// Checks that ExpandRectUnderConstraints properly handles the case where the
// rectangle to be added cannot be covered.
TEST(FrameCropRegionComputerTest, ExpandRectUnderConstraintsNotCovered) {
TestClass::CoverType cover_type;
auto test_class = GetTestableClass();
Rect base_rect = MakeRect(0, 0, 500, 500);
Rect rect_to_add = MakeRect(550, 300, 100, 900);
MP_EXPECT_OK(test_class->ExpandRectUnderConstraints(
rect_to_add, kTargetWidth, kTargetHeight, &base_rect, &cover_type));
EXPECT_EQ(cover_type, TestClass::NOT_COVERED); // no overlap in x dimension
EXPECT_TRUE(CheckRectsEqual(base_rect, MakeRect(0, 0, 500, 500)));
}
// Checks that ComputeFrameCropRegion handles the case of empty detections.
TEST(FrameCropRegionComputerTest, HandlesEmptyDetections) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_TRUE(crop_result.region_is_empty());
}
// Checks that ComputeFrameCropRegion covers required regions when their union
// is within target size.
TEST(FrameCropRegionComputerTest, CoversRequiredWithinTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(100, 100, 100, 200), true, &key_frame_info);
AddDetection(MakeRect(200, 400, 300, 500), true, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(100, 100, 400, 800),
crop_result.required_region()));
EXPECT_TRUE(
CheckRectsEqual(crop_result.region(), crop_result.required_region()));
EXPECT_TRUE(crop_result.are_required_regions_covered_in_target_size());
}
// Checks that ComputeFrameCropRegion covers required regions when their union
// exceeds target size.
TEST(FrameCropRegionComputerTest, CoversRequiredExceedingTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 100, 500), true, &key_frame_info);
AddDetection(MakeRect(200, 400, 500, 500), true, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(0, 0, 700, 900), crop_result.region()));
EXPECT_TRUE(
CheckRectsEqual(crop_result.region(), crop_result.required_region()));
EXPECT_FALSE(crop_result.are_required_regions_covered_in_target_size());
}
// Checks that ComputeFrameCropRegion handles the case of only non-required
// regions and the region fits in the target size.
TEST(FrameCropRegionComputerTest,
HandlesOnlyNonRequiedRegionsInsideTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(300, 600, 100, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_TRUE(crop_result.required_region_is_empty());
EXPECT_FALSE(crop_result.region_is_empty());
EXPECT_TRUE(
CheckRectsEqual(key_frame_info.detections().detections(0).location(),
crop_result.region()));
}
// Checks that ComputeFrameCropRegion handles the case of only non-required
// regions and the region exceeds the target size.
TEST(FrameCropRegionComputerTest,
HandlesOnlyNonRequiedRegionsExceedingTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(300, 600, 700, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_TRUE(crop_result.required_region_is_empty());
EXPECT_FALSE(crop_result.region_is_empty());
EXPECT_TRUE(
CheckRectsEqual(MakeRect(475, 600, 350, 100), crop_result.region()));
EXPECT_EQ(crop_result.fraction_non_required_covered(), 0.0);
EXPECT_TRUE(
CheckRectIsInside(crop_result.region(),
key_frame_info.detections().detections(0).location()));
}
// Checks that ComputeFrameCropRegion covers non-required regions when their
// union fits within target size.
TEST(FrameCropRegionComputerTest, CoversNonRequiredInsideTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 100, 500), true, &key_frame_info);
AddDetection(MakeRect(300, 600, 100, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(0, 0, 400, 700), crop_result.region()));
EXPECT_TRUE(crop_result.are_required_regions_covered_in_target_size());
EXPECT_EQ(crop_result.fraction_non_required_covered(), 1.0);
for (int i = 0; i < key_frame_info.detections().detections_size(); ++i) {
EXPECT_TRUE(
CheckRectIsInside(key_frame_info.detections().detections(i).location(),
crop_result.region()));
}
}
// Checks that ComputeFrameCropRegion does not cover non-required regions that
// are outside the target size.
TEST(FrameCropRegionComputerTest, DoesNotCoverNonRequiredExceedingTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 500, 1000), true, &key_frame_info);
AddDetection(MakeRect(500, 0, 100, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(0, 0, 500, 1000), crop_result.region()));
EXPECT_TRUE(crop_result.are_required_regions_covered_in_target_size());
EXPECT_EQ(crop_result.fraction_non_required_covered(), 0.0);
EXPECT_FALSE(
CheckRectIsInside(key_frame_info.detections().detections(1).location(),
crop_result.region()));
}
// Checks that ComputeFrameCropRegion partially covers non-required regions that
// can partially fit in the target size.
TEST(FrameCropRegionComputerTest,
PartiallyCoversNonRequiredContainingTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(100, 0, 350, 1000), true, &key_frame_info);
AddDetection(MakeRect(0, 0, 650, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(
CheckRectsEqual(MakeRect(100, 0, 387, 1000), crop_result.region()));
EXPECT_TRUE(crop_result.are_required_regions_covered_in_target_size());
EXPECT_EQ(crop_result.fraction_non_required_covered(), 0.0);
EXPECT_TRUE(
CheckRectsOverlap(key_frame_info.detections().detections(1).location(),
crop_result.region()));
}
// Checks that ComputeFrameCropRegion covers non-required regions when the
// required regions exceed target size.
TEST(FrameCropRegionComputerTest,
CoversNonRequiredWhenRequiredExceedsTargetSize) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 600, 1000), true, &key_frame_info);
AddDetection(MakeRect(450, 0, 100, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(0, 0, 600, 1000), crop_result.region()));
EXPECT_FALSE(crop_result.are_required_regions_covered_in_target_size());
EXPECT_EQ(crop_result.fraction_non_required_covered(), 1.0);
for (int i = 0; i < key_frame_info.detections().detections_size(); ++i) {
EXPECT_TRUE(
CheckRectIsInside(key_frame_info.detections().detections(i).location(),
crop_result.region()));
}
}
// Checks that ComputeFrameCropRegion does not extend the crop region when
// the non-required region is too far.
TEST(FrameCropRegionComputerTest,
DoesNotExtendRegionWhenNonRequiredRegionIsTooFar) {
const auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 400, 400), true, &key_frame_info);
AddDetection(MakeRect(600, 0, 100, 100), false, &key_frame_info);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
CheckRequiredRegionsAreCovered(key_frame_info, crop_result);
EXPECT_TRUE(CheckRectsEqual(MakeRect(0, 0, 400, 400), crop_result.region()));
EXPECT_TRUE(crop_result.are_required_regions_covered_in_target_size());
EXPECT_EQ(crop_result.fraction_non_required_covered(), 0.0);
EXPECT_FALSE(
CheckRectsOverlap(key_frame_info.detections().detections(1).location(),
crop_result.region()));
}
// Checks that ComputeFrameCropRegion computes the score correctly when the
// aggregation type is maximum.
TEST(FrameCropRegionComputerTest, ComputesScoreWhenAggregationIsMaximum) {
auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
options.set_score_aggregation_type(KeyFrameCropOptions::MAXIMUM);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 400, 400), true, &key_frame_info, 0.1);
AddDetection(MakeRect(300, 300, 200, 500), true, &key_frame_info, 0.9);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_FLOAT_EQ(crop_result.region_score(), 0.9f);
}
// Checks that ComputeFrameCropRegion computes the score correctly when the
// aggregation type is sum required regions.
TEST(FrameCropRegionComputerTest, ComputesScoreWhenAggregationIsSumRequired) {
auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
options.set_score_aggregation_type(KeyFrameCropOptions::SUM_REQUIRED);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 400, 400), true, &key_frame_info, 0.1);
AddDetection(MakeRect(300, 300, 200, 500), true, &key_frame_info, 0.9);
AddDetection(MakeRect(300, 300, 200, 500), false, &key_frame_info, 0.5);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_FLOAT_EQ(crop_result.region_score(), 1.0f);
}
// Checks that ComputeFrameCropRegion computes the score correctly when the
// aggregation type is sum all covered regions.
TEST(FrameCropRegionComputerTest, ComputesScoreWhenAggregationIsSumAll) {
auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
options.set_score_aggregation_type(KeyFrameCropOptions::SUM_ALL);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 400, 400), true, &key_frame_info, 0.1);
AddDetection(MakeRect(300, 300, 200, 500), true, &key_frame_info, 0.9);
AddDetection(MakeRect(300, 300, 200, 500), false, &key_frame_info, 0.5);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_FLOAT_EQ(crop_result.region_score(), 1.5f);
}
// Checks that ComputeFrameCropRegion computes the score correctly when the
// aggregation type is constant.
TEST(FrameCropRegionComputerTest, ComputesScoreWhenAggregationIsConstant) {
auto options = MakeKeyFrameCropOptions(kTargetWidth, kTargetHeight);
options.set_score_aggregation_type(KeyFrameCropOptions::CONSTANT);
FrameCropRegionComputer computer(options);
KeyFrameInfo key_frame_info;
AddDetection(MakeRect(0, 0, 400, 400), true, &key_frame_info, 0.1);
AddDetection(MakeRect(300, 300, 200, 500), true, &key_frame_info, 0.9);
AddDetection(MakeRect(300, 300, 200, 500), false, &key_frame_info, 0.5);
KeyFrameCropResult crop_result;
MP_EXPECT_OK(computer.ComputeFrameCropRegion(key_frame_info, &crop_result));
EXPECT_FLOAT_EQ(crop_result.region_score(), 1.0f);
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,40 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_MATH_UTILS_H_
#define MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_MATH_UTILS_H_
class MathUtil {
public:
// Clamps value to the range [low, high]. Requires low <= high. Returns false
// if this check fails, otherwise returns true. Caller should first check the
// returned boolean.
template <typename T> // T models LessThanComparable.
static bool Clamp(const T& low, const T& high, const T& value, T* result) {
// Prevents errors in ordering the arguments.
if (low > high) {
return false;
}
if (high < value) {
*result = high;
} else if (value < low) {
*result = low;
} else {
*result = value;
}
return true;
}
};
#endif // MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_MATH_UTILS_H_

View File

@ -0,0 +1,177 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace autoflip {
PaddingEffectGenerator::PaddingEffectGenerator(const int input_width,
const int input_height,
const double target_aspect_ratio,
bool scale_to_multiple_of_two) {
target_aspect_ratio_ = target_aspect_ratio;
const double input_aspect_ratio =
static_cast<double>(input_width) / static_cast<double>(input_height);
input_width_ = input_width;
input_height_ = input_height;
is_vertical_padding_ = input_aspect_ratio > target_aspect_ratio;
output_width_ = is_vertical_padding_
? std::round(target_aspect_ratio * input_height)
: input_width;
output_height_ = is_vertical_padding_
? input_height
: std::round(input_width / target_aspect_ratio);
if (scale_to_multiple_of_two) {
output_width_ = output_width_ / 2 * 2;
output_height_ = output_height_ / 2 * 2;
}
}
::mediapipe::Status PaddingEffectGenerator::Process(
const ImageFrame& input_frame, const float background_contrast,
const int blur_cv_size, const float overlay_opacity,
ImageFrame* output_frame, const cv::Scalar* background_color_in_rgb) {
RET_CHECK_EQ(input_frame.Width(), input_width_);
RET_CHECK_EQ(input_frame.Height(), input_height_);
RET_CHECK(output_frame);
cv::Mat original_image = formats::MatView(&input_frame);
// This is the canvas that we are going to draw the padding effect on to.
cv::Mat canvas(output_height_, output_width_, original_image.type());
const int effective_input_width =
is_vertical_padding_ ? input_width_ : input_height_;
const int effective_input_height =
is_vertical_padding_ ? input_height_ : input_width_;
const int effective_output_width =
is_vertical_padding_ ? output_width_ : output_height_;
const int effective_output_height =
is_vertical_padding_ ? output_height_ : output_width_;
if (!is_vertical_padding_) {
original_image = original_image.t();
canvas = canvas.t();
}
const int foreground_height =
effective_input_height * effective_output_width / effective_input_width;
int x = -1, y = -1, width = -1, height = -1;
// The following steps does the padding operation, with several steps.
// #1, we prepare the background. If a solid background color is given, we use
// it directly. Otherwise, we first crop a region of size "output_width_ *
// output_height_" off of the original frame to become the background of
// the final frame, and then we blur it and adjust contrast and opacity.
if (background_color_in_rgb != nullptr) {
canvas = *background_color_in_rgb;
} else {
// Copy the original image to the background.
x = 0.5 * (effective_input_width - effective_output_width);
y = 0;
width = effective_output_width;
height = effective_output_height;
cv::Rect crop_window_for_background(x, y, width, height);
original_image(crop_window_for_background).copyTo(canvas);
// Blur.
const int cv_size =
blur_cv_size % 2 == 1 ? blur_cv_size : (blur_cv_size + 1);
const cv::Size kernel(cv_size, cv_size);
// TODO: the larger the kernel size, the slower the blurring
// operation is. Consider running multiple sequential blurs with smaller
// sizes to simulate the effect of using a large size. This might be able to
// speed up the process.
x = 0;
width = effective_output_width;
const cv::Rect canvas_rect(0, 0, canvas.cols, canvas.rows);
// Blur the top region (above foreground).
y = 0;
height = (effective_output_height - foreground_height) / 2 + cv_size;
const cv::Rect top_blur_region =
cv::Rect(x, y, width, height) & canvas_rect;
if (top_blur_region.area() > 0) {
cv::Mat top_blurred = canvas(top_blur_region);
cv::GaussianBlur(top_blurred, top_blurred, kernel, 0, 0);
}
// Blur the bottom region (below foreground).
y = height + foreground_height - cv_size;
height = effective_output_height - y;
const cv::Rect bottom_blur_region =
cv::Rect(x, y, width, height) & canvas_rect;
if (bottom_blur_region.area() > 0) {
cv::Mat bottom_blurred = canvas(bottom_blur_region);
cv::GaussianBlur(bottom_blurred, bottom_blurred, kernel, 0, 0);
}
const float kEqualThreshold = 0.0001f;
// Background contrast adjustment.
if (std::abs(background_contrast - 1.0f) > kEqualThreshold) {
canvas *= background_contrast;
}
// Alpha blend a translucent black layer.
if (std::abs(overlay_opacity - 0.0f) > kEqualThreshold) {
cv::Mat overlay = cv::Mat::zeros(canvas.size(), canvas.type());
cv::addWeighted(overlay, overlay_opacity, canvas, 1 - overlay_opacity, 0,
canvas);
}
}
// #2, we crop the entire region off of the original frame. This will become
// the foreground in the final frame.
x = 0;
y = 0;
width = effective_input_width;
height = effective_input_height;
cv::Rect crop_window_for_foreground(x, y, width, height);
// #3, we specify a region of size computed as below in the final frame to
// embed the foreground that we obtained in #2. The aspect ratio of
// this region should be the same as the foreground, but with a
// smaller size. Therefore, the height and width are derived using
// the ratio of the sizes.
// - embed size: output_width_ * height (to be computed)
// - foreground: input_width * input_height
//
// The location of this region is horizontally centralized in the
// frame, and saturated in horizontal dimension.
x = 0;
y = (effective_output_height - foreground_height) / 2;
width = effective_output_width;
height = foreground_height;
cv::Rect region_to_embed_foreground(x, y, width, height);
cv::Mat dst = canvas(region_to_embed_foreground);
cv::resize(original_image(crop_window_for_foreground), dst, dst.size());
if (!is_vertical_padding_) {
canvas = canvas.t();
}
output_frame->CopyPixelData(input_frame.Format(), canvas.cols, canvas.rows,
canvas.data,
ImageFrame::kDefaultAlignmentBoundary);
return ::mediapipe::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -0,0 +1,70 @@
#ifndef MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_PADDING_EFFECT_GENERATOR_H_
#define MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_PADDING_EFFECT_GENERATOR_H_
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace autoflip {
// Generates padding effects given input frames. Depending on where the padded
// contents are added, there are two cases:
// 1) Pad on the top and bottom of the input frame, aka vertical padding, i.e.
// input_aspect_ratio > target_aspect_ratio. In this case, output frames will
// have the same height as input frames, and the width will be adjusted to
// match the target aspect ratio.
// 2) Pad on the left and right of the input frame, aka horizontal padding, i.e.
// input_aspect_ratio < target_aspect_ratio. In this case, output frames will
// have the same width as original frames, and the height will be adjusted to
// match the target aspect ratio.
// If a background color is given, the background of the output frame will be
// filled with this solid color; otherwise, it is a blurred version of the input
// frame.
//
// Note: in both horizontal and vertical padding effects, the output frame size
// will be at most as large as the input frame size, with one dimension the
// same as the input (horizontal padding: width, vertical padding: height). If
// you intented to have the output frame be larger, you could add a
// ScaleImageCalculator as an upstream node before calling this calculator in
// your MediaPipe graph (not as a downstream node, because visual details may
// lose after appling the padding effect).
class PaddingEffectGenerator {
public:
// Always outputs width and height that are divisible by 2 if
// scale_to_multiple_of_two is set to true.
PaddingEffectGenerator(const int input_width, const int input_height,
const double target_aspect_ratio,
bool scale_to_multiple_of_two = false);
// Apply the padding effect on the input frame.
// - blur_cv_size: The cv::Size() parameter used in creating blurry effects
// for padding backgrounds.
// - background_contrast: Contrast adjustment for padding background. This
// value should between 0 and 1, and the smaller the value, the darker the
// background.
// - overlay_opacity: In addition to adjusting the contrast, a translucent
// black layer will be alpha blended with the background. This value defines
// the opacity of the black layer.
// - background_color_in_rgb: If not null, uses this solid color as background
// instead of blurring the image, and does not adjust contrast or opacity.
::mediapipe::Status Process(
const ImageFrame& input_frame, const float background_contrast,
const int blur_cv_size, const float overlay_opacity,
ImageFrame* output_frame,
const cv::Scalar* background_color_in_rgb = nullptr);
private:
double target_aspect_ratio_;
int input_width_ = -1;
int input_height_ = -1;
int output_width_ = -1;
int output_height_ = -1;
bool is_vertical_padding_;
};
} // namespace autoflip
} // namespace mediapipe
#endif // MEDIAPIPE_EXAMPLES_DESKTOP_AUTOFLIP_QUALITY_PADDING_EFFECT_GENERATOR_H_

Some files were not shown because too many files have changed in this diff Show More