Project import generated by Copybara.
GitOrigin-RevId: d91373b4d4d10abef49cab410caa6aadf0875049
3
.bazelrc
|
|
@ -19,6 +19,9 @@ build --incompatible_depset_is_not_iterable=false
|
|||
# Sets the default Apple platform to macOS.
|
||||
build --apple_platform_type=macos
|
||||
|
||||
# Allow debugging with XCODE
|
||||
build --apple_generate_dsym
|
||||
|
||||
# Android configs.
|
||||
build:android --crosstool_top=//external:android/crosstool
|
||||
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
libopencv-highgui-dev \
|
||||
libopencv-imgproc-dev \
|
||||
libopencv-video-dev \
|
||||
libopencv-calib3d-dev \
|
||||
libopencv-features2d-dev \
|
||||
software-properties-common && \
|
||||
add-apt-repository -y ppa:openjdk-r/ppa && \
|
||||
apt-get update && apt-get install -y openjdk-8-jdk && \
|
||||
|
|
|
|||
|
|
@ -14,12 +14,13 @@
|
|||
* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md)
|
||||
* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md)
|
||||
* [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
|
||||
* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
## Installation
|
||||
Follow these [instructions](mediapipe/docs/install.md).
|
||||
|
|
|
|||
22
WORKSPACE
|
|
@ -116,6 +116,13 @@ http_archive(
|
|||
"https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
|
||||
"https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
|
||||
],
|
||||
# Patch https://github.com/tensorflow/tensorflow/commit/e3a7bdbebb99352351a19e2e403136166aa52934
|
||||
patches = [
|
||||
"@//third_party:org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff"
|
||||
],
|
||||
patch_args = [
|
||||
"-p1",
|
||||
],
|
||||
strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
|
||||
sha256 = _TENSORFLOW_SHA256,
|
||||
)
|
||||
|
|
@ -123,8 +130,22 @@ http_archive(
|
|||
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
|
||||
tf_workspace(tf_repo_name = "org_tensorflow")
|
||||
|
||||
http_archive(
|
||||
name = "ceres_solver",
|
||||
url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
|
||||
patches = [
|
||||
"@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff"
|
||||
],
|
||||
patch_args = [
|
||||
"-p1",
|
||||
],
|
||||
strip_prefix = "ceres-solver-1.14.0",
|
||||
sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1"
|
||||
)
|
||||
|
||||
# Please run
|
||||
# $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
||||
# libopencv-calib3d-dev libopencv-features2d-dev \
|
||||
# libopencv-imgproc-dev libopencv-video-dev
|
||||
new_local_repository(
|
||||
name = "linux_opencv",
|
||||
|
|
@ -292,3 +313,4 @@ http_archive(
|
|||
strip_prefix = "google-toolbox-for-mac-2.2.1",
|
||||
build_file = "@//third_party:google_toolbox_for_mac.BUILD",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
"mediapipe/examples/ios/facedetectiongpu/BUILD",
|
||||
"mediapipe/examples/ios/handdetectiongpu/BUILD",
|
||||
"mediapipe/examples/ios/handtrackinggpu/BUILD",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu/BUILD",
|
||||
"mediapipe/examples/ios/objectdetectioncpu/BUILD",
|
||||
"mediapipe/examples/ios/objectdetectiongpu/BUILD"
|
||||
],
|
||||
|
|
@ -18,6 +19,7 @@
|
|||
"//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp",
|
||||
"//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
|
||||
"//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
|
||||
"//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
|
||||
"//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
|
||||
"//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
|
||||
"//mediapipe/objc:mediapipe_framework_ios"
|
||||
|
|
@ -84,6 +86,8 @@
|
|||
"mediapipe/examples/ios/handdetectiongpu/Base.lproj",
|
||||
"mediapipe/examples/ios/handtrackinggpu",
|
||||
"mediapipe/examples/ios/handtrackinggpu/Base.lproj",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu/Base.lproj",
|
||||
"mediapipe/examples/ios/objectdetectioncpu",
|
||||
"mediapipe/examples/ios/objectdetectioncpu/Base.lproj",
|
||||
"mediapipe/examples/ios/objectdetectiongpu",
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
"mediapipe/examples/ios/facedetectiongpu",
|
||||
"mediapipe/examples/ios/handdetectiongpu",
|
||||
"mediapipe/examples/ios/handtrackinggpu",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu",
|
||||
"mediapipe/examples/ios/objectdetectioncpu",
|
||||
"mediapipe/examples/ios/objectdetectiongpu"
|
||||
],
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ cc_library(
|
|||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -13,11 +13,12 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Attach the header from one stream to another stream.
|
||||
// Attach the header from a stream or side input to another stream.
|
||||
//
|
||||
// The header stream (tag HEADER) must not have any packets in it.
|
||||
//
|
||||
|
|
@ -25,17 +26,53 @@ namespace mediapipe {
|
|||
// calculator to not need a header or to accept a separate stream with
|
||||
// a header, that would be more future proof.
|
||||
//
|
||||
// Example usage 1:
|
||||
// node {
|
||||
// calculator: "AddHeaderCalculator"
|
||||
// input_stream: "DATA:audio"
|
||||
// input_stream: "HEADER:audio_header"
|
||||
// output_stream: "audio_with_header"
|
||||
// }
|
||||
//
|
||||
// Example usage 2:
|
||||
// node {
|
||||
// calculator: "AddHeaderCalculator"
|
||||
// input_stream: "DATA:audio"
|
||||
// input_side_packet: "HEADER:audio_header"
|
||||
// output_stream: "audio_with_header"
|
||||
// }
|
||||
//
|
||||
class AddHeaderCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag("HEADER").SetNone();
|
||||
bool has_side_input = false;
|
||||
bool has_header_stream = false;
|
||||
if (cc->InputSidePackets().HasTag("HEADER")) {
|
||||
cc->InputSidePackets().Tag("HEADER").SetAny();
|
||||
has_side_input = true;
|
||||
}
|
||||
if (cc->Inputs().HasTag("HEADER")) {
|
||||
cc->Inputs().Tag("HEADER").SetNone();
|
||||
has_header_stream = true;
|
||||
}
|
||||
if (has_side_input == has_header_stream) {
|
||||
return mediapipe::InvalidArgumentError(
|
||||
"Header must be provided via exactly one of side input and input "
|
||||
"stream");
|
||||
}
|
||||
cc->Inputs().Tag("DATA").SetAny();
|
||||
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Tag("DATA"));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
const Packet& header = cc->Inputs().Tag("HEADER").Header();
|
||||
Packet header;
|
||||
if (cc->InputSidePackets().HasTag("HEADER")) {
|
||||
header = cc->InputSidePackets().Tag("HEADER");
|
||||
}
|
||||
if (cc->Inputs().HasTag("HEADER")) {
|
||||
header = cc->Inputs().Tag("HEADER").Header();
|
||||
}
|
||||
if (!header.IsEmpty()) {
|
||||
cc->Outputs().Index(0).SetHeader(header);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,8 +14,10 @@
|
|||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
#include "mediapipe/framework/timestamp.h"
|
||||
#include "mediapipe/framework/tool/validate_type.h"
|
||||
|
|
@ -24,7 +26,7 @@ namespace mediapipe {
|
|||
|
||||
class AddHeaderCalculatorTest : public ::testing::Test {};
|
||||
|
||||
TEST_F(AddHeaderCalculatorTest, Works) {
|
||||
TEST_F(AddHeaderCalculatorTest, HeaderStream) {
|
||||
CalculatorGraphConfig::Node node;
|
||||
node.set_calculator("AddHeaderCalculator");
|
||||
node.add_input_stream("HEADER:header_stream");
|
||||
|
|
@ -96,4 +98,62 @@ TEST_F(AddHeaderCalculatorTest, NoPacketsOnHeaderStream) {
|
|||
ASSERT_FALSE(runner.Run().ok());
|
||||
}
|
||||
|
||||
TEST_F(AddHeaderCalculatorTest, InputSidePacket) {
|
||||
CalculatorGraphConfig::Node node;
|
||||
node.set_calculator("AddHeaderCalculator");
|
||||
node.add_input_stream("DATA:data_stream");
|
||||
node.add_output_stream("merged_stream");
|
||||
node.add_input_side_packet("HEADER:header");
|
||||
|
||||
CalculatorRunner runner(node);
|
||||
|
||||
// Set header and add 5 packets.
|
||||
runner.MutableSidePackets()->Tag("HEADER") =
|
||||
Adopt(new std::string("my_header"));
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
||||
}
|
||||
|
||||
// Run calculator.
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
ASSERT_EQ(1, runner.Outputs().NumEntries());
|
||||
|
||||
// Test output.
|
||||
EXPECT_EQ(std::string("my_header"),
|
||||
runner.Outputs().Index(0).header.Get<std::string>());
|
||||
const std::vector<Packet>& output_packets = runner.Outputs().Index(0).packets;
|
||||
ASSERT_EQ(5, output_packets.size());
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
const int val = output_packets[i].Get<int>();
|
||||
EXPECT_EQ(i, val);
|
||||
EXPECT_EQ(Timestamp(i * 1000), output_packets[i].Timestamp());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
|
||||
CalculatorGraphConfig::Node node;
|
||||
node.set_calculator("AddHeaderCalculator");
|
||||
node.add_input_stream("HEADER:header_stream");
|
||||
node.add_input_stream("DATA:data_stream");
|
||||
node.add_output_stream("merged_stream");
|
||||
node.add_input_side_packet("HEADER:header");
|
||||
|
||||
CalculatorRunner runner(node);
|
||||
|
||||
// Set both headers and add 5 packets.
|
||||
runner.MutableSidePackets()->Tag("HEADER") =
|
||||
Adopt(new std::string("my_header"));
|
||||
runner.MutableSidePackets()->Tag("HEADER") =
|
||||
Adopt(new std::string("my_header"));
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
||||
}
|
||||
|
||||
// Run should fail because header can only be provided one way.
|
||||
EXPECT_EQ(runner.Run().code(), ::mediapipe::InvalidArgumentError("").code());
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
|||
|
|
@ -330,22 +330,27 @@ void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
|
|||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
LOG_IF(WARNING, frame_time_usec_ <
|
||||
(cc->InputTimestamp() - last_packet_.Timestamp()).Value())
|
||||
<< "Adding jitter is meaningless when upsampling.";
|
||||
|
||||
const int64 curr_diff =
|
||||
(next_output_timestamp_ - cc->InputTimestamp()).Value();
|
||||
const int64 last_diff =
|
||||
(next_output_timestamp_ - last_packet_.Timestamp()).Value();
|
||||
if (curr_diff * last_diff > 0) {
|
||||
return ::mediapipe::OkStatus();
|
||||
if (frame_time_usec_ <
|
||||
(cc->InputTimestamp() - last_packet_.Timestamp()).Value()) {
|
||||
LOG_FIRST_N(WARNING, 2)
|
||||
<< "Adding jitter is not very useful when upsampling.";
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const int64 last_diff =
|
||||
(next_output_timestamp_ - last_packet_.Timestamp()).Value();
|
||||
RET_CHECK_GT(last_diff, 0.0);
|
||||
const int64 curr_diff =
|
||||
(next_output_timestamp_ - cc->InputTimestamp()).Value();
|
||||
if (curr_diff > 0.0) {
|
||||
break;
|
||||
}
|
||||
OutputWithinLimits(cc, (std::abs(curr_diff) > last_diff
|
||||
? last_packet_
|
||||
: cc->Inputs().Get(input_data_id_).Value())
|
||||
.At(next_output_timestamp_));
|
||||
UpdateNextOutputTimestampWithJitter();
|
||||
}
|
||||
OutputWithinLimits(cc, (std::abs(curr_diff) > std::abs(last_diff)
|
||||
? last_packet_
|
||||
: cc->Inputs().Get(input_data_id_).Value())
|
||||
.At(next_output_timestamp_));
|
||||
UpdateNextOutputTimestampWithJitter();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -501,8 +501,11 @@ void ImageCroppingCalculator::GetOutputDimensions(CalculatorContext* cc,
|
|||
row_max = std::max(row_max, transformed_points_[i * 2 + 1]);
|
||||
}
|
||||
|
||||
*dst_width = std::round((col_max - col_min) * src_width);
|
||||
*dst_height = std::round((row_max - row_min) * src_height);
|
||||
int width = static_cast<int>(std::round((col_max - col_min) * src_width));
|
||||
int height = static_cast<int>(std::round((row_max - row_min) * src_height));
|
||||
// Minimum output dimension 1x1 prevents creation of textures with 0x0.
|
||||
*dst_width = std::max(1, width);
|
||||
*dst_height = std::max(1, height);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
|||
|
|
@ -983,3 +983,31 @@ cc_test(
|
|||
"//mediapipe/framework/port:parse_text_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "detections_to_timed_box_list_calculator",
|
||||
srcs = ["detections_to_timed_box_list_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util/tracking:box_tracker",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "detection_unique_id_calculator",
|
||||
srcs = ["detection_unique_id_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
|||
110
mediapipe/calculators/util/detection_unique_id_calculator.cc
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kDetectionListTag[] = "DETECTION_LIST";
|
||||
|
||||
// Each detection processed by DetectionUniqueIDCalculator will be assigned an
|
||||
// unique id that starts from 1. If a detection already has an ID other than 0,
|
||||
// the ID will be overwritten.
|
||||
static int64 detection_id = 0;
|
||||
|
||||
inline int GetNextDetectionId() { return ++detection_id; }
|
||||
|
||||
} // namespace
|
||||
|
||||
// Assign a unique id to detections.
|
||||
// Note that the calculator will consume the input vector of Detection or
|
||||
// DetectionList. So the input stream can not be connected to other calculators.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "DetectionUniqueIdCalculator"
|
||||
// input_stream: "DETECTIONS:detections"
|
||||
// output_stream: "DETECTIONS:output_detections"
|
||||
// }
|
||||
class DetectionUniqueIdCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
|
||||
cc->Inputs().HasTag(kDetectionsTag))
|
||||
<< "None of the input streams are provided.";
|
||||
|
||||
if (cc->Inputs().HasTag(kDetectionListTag)) {
|
||||
RET_CHECK(cc->Outputs().HasTag(kDetectionListTag));
|
||||
cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
|
||||
cc->Outputs().Tag(kDetectionListTag).Set<DetectionList>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kDetectionsTag)) {
|
||||
RET_CHECK(cc->Outputs().HasTag(kDetectionsTag));
|
||||
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
||||
cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(::mediapipe::TimestampDiff(0));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(DetectionUniqueIdCalculator);
|
||||
|
||||
::mediapipe::Status DetectionUniqueIdCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
if (cc->Inputs().HasTag(kDetectionListTag) &&
|
||||
!cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
|
||||
auto result =
|
||||
cc->Inputs().Tag(kDetectionListTag).Value().Consume<DetectionList>();
|
||||
if (result.ok()) {
|
||||
auto detection_list = std::move(result).ValueOrDie();
|
||||
for (Detection& detection : *detection_list->mutable_detection()) {
|
||||
detection.set_detection_id(GetNextDetectionId());
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kDetectionListTag)
|
||||
.Add(detection_list.release(), cc->InputTimestamp());
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kDetectionsTag) &&
|
||||
!cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
|
||||
auto result = cc->Inputs()
|
||||
.Tag(kDetectionsTag)
|
||||
.Value()
|
||||
.Consume<std::vector<Detection>>();
|
||||
if (result.ok()) {
|
||||
auto detections = std::move(result).ValueOrDie();
|
||||
for (Detection& detection : *detections) {
|
||||
detection.set_detection_id(GetNextDetectionId());
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kDetectionsTag)
|
||||
.Add(detections.release(), cc->InputTimestamp());
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
@ -39,6 +39,8 @@ constexpr char kKeypointLabel[] = "KEYPOINT";
|
|||
// The ratio of detection label font height to the height of detection bounding
|
||||
// box.
|
||||
constexpr double kLabelToBoundingBoxRatio = 0.1;
|
||||
// Perserve 2 decimal digits.
|
||||
constexpr float kNumScoreDecimalDigitsMultipler = 100;
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
@ -235,18 +237,26 @@ void DetectionsToRenderDataCalculator::AddLabels(
|
|||
std::string label_str = detection.label().empty()
|
||||
? absl::StrCat(detection.label_id(i))
|
||||
: detection.label(i);
|
||||
const float rounded_score =
|
||||
std::round(detection.score(i) * kNumScoreDecimalDigitsMultipler) /
|
||||
kNumScoreDecimalDigitsMultipler;
|
||||
std::string label_and_score =
|
||||
absl::StrCat(label_str, options.text_delimiter(), detection.score(i),
|
||||
absl::StrCat(label_str, options.text_delimiter(), rounded_score,
|
||||
options.text_delimiter());
|
||||
label_and_scores.push_back(label_and_score);
|
||||
}
|
||||
std::vector<std::string> labels;
|
||||
if (options.render_detection_id()) {
|
||||
const std::string detection_id_str =
|
||||
absl::StrCat("Id: ", detection.detection_id());
|
||||
labels.push_back(detection_id_str);
|
||||
}
|
||||
if (options.one_label_per_line()) {
|
||||
labels.swap(label_and_scores);
|
||||
labels.insert(labels.end(), label_and_scores.begin(),
|
||||
label_and_scores.end());
|
||||
} else {
|
||||
labels.push_back(absl::StrJoin(label_and_scores, ""));
|
||||
}
|
||||
|
||||
// Add the render annotations for "label(_id),score".
|
||||
for (int i = 0; i < labels.size(); ++i) {
|
||||
auto label = labels.at(i);
|
||||
|
|
|
|||
|
|
@ -53,4 +53,7 @@ message DetectionsToRenderDataCalculatorOptions {
|
|||
// instances of this calculator are present in the graph, this value
|
||||
// should be unique among them.
|
||||
optional string scene_class = 7 [default = "DETECTION"];
|
||||
|
||||
// If true, renders the detection id in the first line before the labels.
|
||||
optional bool render_detection_id = 8 [default = false];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kDetectionListTag[] = "DETECTION_LIST";
|
||||
constexpr char kBoxesTag[] = "BOXES";
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator that converts Detection proto to TimedBoxList proto for
|
||||
// tracking.
|
||||
//
|
||||
// Please note that only Location Data formats of RELATIVE_BOUNDING_BOX are
|
||||
// supported.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "DetectionsToTimedBoxListCalculator"
|
||||
// input_stream: "DETECTIONS:detections"
|
||||
// output_stream: "BOXES:boxes"
|
||||
// }
|
||||
class DetectionsToTimedBoxListCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
|
||||
cc->Inputs().HasTag(kDetectionsTag))
|
||||
<< "None of the input streams are provided.";
|
||||
if (cc->Inputs().HasTag(kDetectionListTag)) {
|
||||
cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kDetectionsTag)) {
|
||||
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
||||
}
|
||||
cc->Outputs().Tag(kBoxesTag).Set<TimedBoxProtoList>();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
void ConvertDetectionToTimedBox(const Detection& detection,
|
||||
TimedBoxProto* box, CalculatorContext* cc);
|
||||
};
|
||||
REGISTER_CALCULATOR(DetectionsToTimedBoxListCalculator);
|
||||
|
||||
::mediapipe::Status DetectionsToTimedBoxListCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
auto output_timed_box_list = absl::make_unique<TimedBoxProtoList>();
|
||||
|
||||
if (cc->Inputs().HasTag(kDetectionListTag)) {
|
||||
const auto& detection_list =
|
||||
cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
|
||||
for (const auto& detection : detection_list.detection()) {
|
||||
TimedBoxProto* box = output_timed_box_list->add_box();
|
||||
ConvertDetectionToTimedBox(detection, box, cc);
|
||||
}
|
||||
}
|
||||
if (cc->Inputs().HasTag(kDetectionsTag)) {
|
||||
const auto& detections =
|
||||
cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
|
||||
for (const auto& detection : detections) {
|
||||
TimedBoxProto* box = output_timed_box_list->add_box();
|
||||
ConvertDetectionToTimedBox(detection, box, cc);
|
||||
}
|
||||
}
|
||||
|
||||
cc->Outputs().Tag(kBoxesTag).Add(output_timed_box_list.release(),
|
||||
cc->InputTimestamp());
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
void DetectionsToTimedBoxListCalculator::ConvertDetectionToTimedBox(
|
||||
const Detection& detection, TimedBoxProto* box, CalculatorContext* cc) {
|
||||
const auto& relative_bounding_box =
|
||||
detection.location_data().relative_bounding_box();
|
||||
box->set_left(relative_bounding_box.xmin());
|
||||
box->set_right(relative_bounding_box.xmin() + relative_bounding_box.width());
|
||||
box->set_top(relative_bounding_box.ymin());
|
||||
box->set_bottom(relative_bounding_box.ymin() +
|
||||
relative_bounding_box.height());
|
||||
box->set_id(detection.detection_id());
|
||||
box->set_time_msec(cc->InputTimestamp().Microseconds() / 1000);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
@ -37,6 +37,67 @@ proto_library(
|
|||
deps = ["//mediapipe/framework:calculator_proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "motion_analysis_calculator_proto",
|
||||
srcs = ["motion_analysis_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util/tracking:motion_analysis_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "flow_packager_calculator_proto",
|
||||
srcs = ["flow_packager_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util/tracking:flow_packager_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "box_tracker_calculator_proto",
|
||||
srcs = ["box_tracker_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/util/tracking:box_tracker_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "motion_analysis_calculator_cc_proto",
|
||||
srcs = ["motion_analysis_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/util/tracking:motion_analysis_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":motion_analysis_calculator_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "flow_packager_calculator_cc_proto",
|
||||
srcs = ["flow_packager_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/util/tracking:flow_packager_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":flow_packager_calculator_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "box_tracker_calculator_cc_proto",
|
||||
srcs = ["box_tracker_calculator.proto"],
|
||||
cc_deps = [
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/util/tracking:box_tracker_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":box_tracker_calculator_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "flow_to_image_calculator_cc_proto",
|
||||
srcs = ["flow_to_image_calculator.proto"],
|
||||
|
|
@ -131,6 +192,94 @@ cc_library(
|
|||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_analysis_calculator",
|
||||
srcs = ["motion_analysis_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":motion_analysis_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:video_stream_header",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util/tracking:camera_motion",
|
||||
"//mediapipe/util/tracking:camera_motion_cc_proto",
|
||||
"//mediapipe/util/tracking:frame_selection_cc_proto",
|
||||
"//mediapipe/util/tracking:motion_analysis",
|
||||
"//mediapipe/util/tracking:motion_estimation",
|
||||
"//mediapipe/util/tracking:motion_models",
|
||||
"//mediapipe/util/tracking:region_flow_cc_proto",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "flow_packager_calculator",
|
||||
srcs = ["flow_packager_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":flow_packager_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/util/tracking:camera_motion_cc_proto",
|
||||
"//mediapipe/util/tracking:flow_packager",
|
||||
"//mediapipe/util/tracking:region_flow_cc_proto",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "box_tracker_calculator",
|
||||
srcs = ["box_tracker_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":box_tracker_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:video_stream_header", # fixdeps: keep -- required for exobazel build.
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/tool:options_util",
|
||||
"//mediapipe/util/tracking",
|
||||
"//mediapipe/util/tracking:box_tracker",
|
||||
"//mediapipe/util/tracking:tracking_visualization_utilities",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracked_detection_manager_calculator",
|
||||
srcs = ["tracked_detection_manager_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util/tracking",
|
||||
"//mediapipe/util/tracking:box_tracker",
|
||||
"//mediapipe/util/tracking:tracked_detection",
|
||||
"//mediapipe/util/tracking:tracked_detection_manager",
|
||||
"//mediapipe/util/tracking:tracking_visualization_utilities",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "test_videos",
|
||||
srcs = [
|
||||
|
|
@ -201,3 +350,64 @@ cc_test(
|
|||
"//mediapipe/framework/port:parse_text_proto",
|
||||
],
|
||||
)
|
||||
|
||||
MEDIAPIPE_DEPS = [
|
||||
"//mediapipe/calculators/video:box_tracker_calculator",
|
||||
"//mediapipe/calculators/video:flow_packager_calculator",
|
||||
"//mediapipe/calculators/video:motion_analysis_calculator",
|
||||
"//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
|
||||
"//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
|
||||
]
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "parallel_tracker_binarypb",
|
||||
graph = "testdata/parallel_tracker_graph.pbtxt",
|
||||
output_name = "testdata/parallel_tracker.binarypb",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = MEDIAPIPE_DEPS,
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "tracker_binarypb",
|
||||
graph = "testdata/tracker_graph.pbtxt",
|
||||
output_name = "testdata/tracker.binarypb",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = MEDIAPIPE_DEPS,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "tracking_graph_test",
|
||||
size = "small",
|
||||
srcs = ["tracking_graph_test.cc"],
|
||||
copts = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
|
||||
"//mediapipe:apple": [],
|
||||
"//mediapipe:android": [],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
data = [
|
||||
":testdata/lenna.png",
|
||||
":testdata/parallel_tracker.binarypb",
|
||||
":testdata/tracker.binarypb",
|
||||
],
|
||||
deps = [
|
||||
":box_tracker_calculator",
|
||||
":box_tracker_calculator_cc_proto",
|
||||
":flow_packager_calculator",
|
||||
":motion_analysis_calculator",
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:packet",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/port:advanced_proto",
|
||||
"//mediapipe/framework/port:core_proto",
|
||||
"//mediapipe/framework/port:file_helpers",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:opencv_highgui",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
|
||||
"//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
|
||||
"//mediapipe/util/tracking:box_tracker_cc_proto",
|
||||
"//mediapipe/util/tracking:tracking_cc_proto",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
1271
mediapipe/calculators/video/box_tracker_calculator.cc
Normal file
55
mediapipe/calculators/video/box_tracker_calculator.proto
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/tracking/box_tracker.proto";
|
||||
|
||||
message BoxTrackerCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional BoxTrackerCalculatorOptions ext = 268767860;
|
||||
}
|
||||
|
||||
optional BoxTrackerOptions tracker_options = 1;
|
||||
|
||||
// Initial position to be tracked. Can also be supplied as side packet or
|
||||
// as input stream.
|
||||
optional TimedBoxProtoList initial_position = 2;
|
||||
|
||||
// If set and VIZ stream is present, renders tracking data into the
|
||||
// visualization.
|
||||
optional bool visualize_tracking_data = 3 [default = false];
|
||||
|
||||
// If set and VIZ stream is present, renders the box state
|
||||
// into the visualization.
|
||||
optional bool visualize_state = 4 [default = false];
|
||||
|
||||
// If set and VIZ stream is present, renders the internal box state
|
||||
// into the visualization.
|
||||
optional bool visualize_internal_state = 5 [default = false];
|
||||
|
||||
// Size of the track data cache during streaming mode. This allows to buffer
|
||||
// track_data's for fast forward tracking, i.e. any TimedBox received
|
||||
// via input stream START_POS can be tracked towards the current track head
|
||||
// (i.e. last received TrackingData). Measured in number of frames.
|
||||
optional int32 streaming_track_data_cache_size = 6 [default = 0];
|
||||
|
||||
// Add a transition period of N frames to smooth the jump from original
|
||||
// tracking to reset start pos with motion compensation. The transition will
|
||||
// be a linear decay of original tracking result. 0 means no transition.
|
||||
optional int32 start_pos_transition_frames = 7 [default = 0];
|
||||
}
|
||||
281
mediapipe/calculators/video/flow_packager_calculator.cc
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/calculators/video/flow_packager_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/flow_packager.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
using mediapipe::CameraMotion;
|
||||
using mediapipe::FlowPackager;
|
||||
using mediapipe::RegionFlowFeatureList;
|
||||
using mediapipe::TrackingData;
|
||||
using mediapipe::TrackingDataChunk;
|
||||
|
||||
// A calculator that packages input CameraMotion and RegionFlowFeatureList
|
||||
// into a TrackingData and optionally writes TrackingDataChunks to file.
|
||||
//
|
||||
// Input stream:
|
||||
// FLOW: Input region flow (proto RegionFlowFeatureList).
|
||||
// CAMERA: Input camera stream (proto CameraMotion, optional).
|
||||
//
|
||||
// Input side packets:
|
||||
// CACHE_DIR: Optional caching directory tracking files are written to.
|
||||
//
|
||||
// Output streams.
|
||||
// TRACKING: Output tracking data (proto TrackingData, per frame
|
||||
// optional).
|
||||
// TRACKING_CHUNK: Output tracking chunks (proto TrackingDataChunk,
|
||||
// per chunk, optional), output at the first timestamp
|
||||
// of each chunk.
|
||||
// COMPLETE: Optional output packet sent on PreStream to
|
||||
// to signal downstream calculators that all data has been
|
||||
// processed and calculator is closed. Can be used to indicate
|
||||
// that all data as been written to CACHE_DIR.
|
||||
class FlowPackagerCalculator : public CalculatorBase {
|
||||
public:
|
||||
~FlowPackagerCalculator() override = default;
|
||||
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Close(CalculatorContext* cc) override;
|
||||
|
||||
// Writes passed chunk to disk.
|
||||
void WriteChunk(const TrackingDataChunk& chunk) const;
|
||||
|
||||
// Initializes next chunk for tracking beginning from last frame of
|
||||
// current chunk (Chunking is design with one frame overlap).
|
||||
void PrepareCurrentForNextChunk(TrackingDataChunk* chunk);
|
||||
|
||||
private:
|
||||
FlowPackagerCalculatorOptions options_;
|
||||
|
||||
// Caching options.
|
||||
bool use_caching_ = false;
|
||||
bool build_chunk_ = false;
|
||||
std::string cache_dir_;
|
||||
int chunk_idx_ = -1;
|
||||
TrackingDataChunk tracking_chunk_;
|
||||
|
||||
int frame_idx_ = 0;
|
||||
|
||||
Timestamp prev_timestamp_;
|
||||
std::unique_ptr<FlowPackager> flow_packager_;
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(FlowPackagerCalculator);
|
||||
|
||||
::mediapipe::Status FlowPackagerCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
if (!cc->Inputs().HasTag("FLOW")) {
|
||||
return tool::StatusFail("No input flow was specified.");
|
||||
}
|
||||
|
||||
cc->Inputs().Tag("FLOW").Set<RegionFlowFeatureList>();
|
||||
|
||||
if (cc->Inputs().HasTag("CAMERA")) {
|
||||
cc->Inputs().Tag("CAMERA").Set<CameraMotion>();
|
||||
}
|
||||
if (cc->Outputs().HasTag("TRACKING")) {
|
||||
cc->Outputs().Tag("TRACKING").Set<TrackingData>();
|
||||
}
|
||||
if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
|
||||
cc->Outputs().Tag("TRACKING_CHUNK").Set<TrackingDataChunk>();
|
||||
}
|
||||
if (cc->Outputs().HasTag("COMPLETE")) {
|
||||
cc->Outputs().Tag("COMPLETE").Set<bool>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("CACHE_DIR")) {
|
||||
cc->InputSidePackets().Tag("CACHE_DIR").Set<std::string>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status FlowPackagerCalculator::Open(CalculatorContext* cc) {
|
||||
options_ = cc->Options<FlowPackagerCalculatorOptions>();
|
||||
|
||||
flow_packager_.reset(new FlowPackager(options_.flow_packager_options()));
|
||||
|
||||
use_caching_ = cc->InputSidePackets().HasTag("CACHE_DIR");
|
||||
build_chunk_ = use_caching_ || cc->Outputs().HasTag("TRACKING_CHUNK");
|
||||
if (use_caching_) {
|
||||
cache_dir_ = cc->InputSidePackets().Tag("CACHE_DIR").Get<std::string>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status FlowPackagerCalculator::Process(CalculatorContext* cc) {
|
||||
InputStream* flow_stream = &(cc->Inputs().Tag("FLOW"));
|
||||
const RegionFlowFeatureList& flow = flow_stream->Get<RegionFlowFeatureList>();
|
||||
|
||||
const Timestamp timestamp = flow_stream->Value().Timestamp();
|
||||
|
||||
const CameraMotion* camera_motion = nullptr;
|
||||
if (cc->Inputs().HasTag("CAMERA")) {
|
||||
InputStream* camera_stream = &(cc->Inputs().Tag("CAMERA"));
|
||||
camera_motion = &camera_stream->Get<CameraMotion>();
|
||||
}
|
||||
|
||||
std::unique_ptr<TrackingData> tracking_data(new TrackingData());
|
||||
|
||||
flow_packager_->PackFlow(flow, camera_motion, tracking_data.get());
|
||||
|
||||
if (build_chunk_) {
|
||||
if (chunk_idx_ < 0) { // Lazy init, determine first start.
|
||||
chunk_idx_ =
|
||||
timestamp.Value() / 1000 / options_.caching_chunk_size_msec();
|
||||
tracking_chunk_.set_first_chunk(true);
|
||||
}
|
||||
CHECK_GE(chunk_idx_, 0);
|
||||
|
||||
TrackingDataChunk::Item* item = tracking_chunk_.add_item();
|
||||
item->set_frame_idx(frame_idx_);
|
||||
item->set_timestamp_usec(timestamp.Value());
|
||||
if (frame_idx_ > 0) {
|
||||
item->set_prev_timestamp_usec(prev_timestamp_.Value());
|
||||
}
|
||||
if (cc->Outputs().HasTag("TRACKING")) {
|
||||
// Need to copy as output is requested.
|
||||
*item->mutable_tracking_data() = *tracking_data;
|
||||
} else {
|
||||
item->mutable_tracking_data()->Swap(tracking_data.get());
|
||||
}
|
||||
|
||||
const int next_chunk_msec =
|
||||
options_.caching_chunk_size_msec() * (chunk_idx_ + 1);
|
||||
|
||||
if (timestamp.Value() / 1000 >= next_chunk_msec) {
|
||||
if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
|
||||
cc->Outputs()
|
||||
.Tag("TRACKING_CHUNK")
|
||||
.Add(new TrackingDataChunk(tracking_chunk_),
|
||||
Timestamp(tracking_chunk_.item(0).timestamp_usec()));
|
||||
}
|
||||
if (use_caching_) {
|
||||
WriteChunk(tracking_chunk_);
|
||||
}
|
||||
PrepareCurrentForNextChunk(&tracking_chunk_);
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("TRACKING")) {
|
||||
cc->Outputs()
|
||||
.Tag("TRACKING")
|
||||
.Add(tracking_data.release(), flow_stream->Value().Timestamp());
|
||||
}
|
||||
|
||||
prev_timestamp_ = timestamp;
|
||||
++frame_idx_;
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status FlowPackagerCalculator::Close(CalculatorContext* cc) {
|
||||
if (frame_idx_ > 0) {
|
||||
tracking_chunk_.set_last_chunk(true);
|
||||
if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
|
||||
cc->Outputs()
|
||||
.Tag("TRACKING_CHUNK")
|
||||
.Add(new TrackingDataChunk(tracking_chunk_),
|
||||
Timestamp(tracking_chunk_.item(0).timestamp_usec()));
|
||||
}
|
||||
|
||||
if (use_caching_) {
|
||||
WriteChunk(tracking_chunk_);
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("COMPLETE")) {
|
||||
cc->Outputs().Tag("COMPLETE").Add(new bool(true), Timestamp::PreStream());
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
void FlowPackagerCalculator::WriteChunk(const TrackingDataChunk& chunk) const {
|
||||
if (chunk.item_size() == 0) {
|
||||
LOG(ERROR) << "Write chunk called with empty tracking data."
|
||||
<< "This can only occur if the spacing between frames "
|
||||
<< "is larger than the requested chunk size. Try increasing "
|
||||
<< "the chunk size";
|
||||
return;
|
||||
}
|
||||
|
||||
auto format_runtime =
|
||||
absl::ParsedFormat<'d'>::New(options_.cache_file_format());
|
||||
|
||||
std::string chunk_file;
|
||||
if (format_runtime) {
|
||||
chunk_file =
|
||||
cache_dir_ + "/" + absl::StrFormat(*format_runtime, chunk_idx_);
|
||||
} else {
|
||||
LOG(ERROR) << "chache_file_format wrong. fall back to chunk_%04d.";
|
||||
chunk_file = cache_dir_ + "/" + absl::StrFormat("chunk_%04d", chunk_idx_);
|
||||
}
|
||||
|
||||
std::string data;
|
||||
chunk.SerializeToString(&data);
|
||||
|
||||
const char* temp_filename = tempnam(cache_dir_.c_str(), nullptr);
|
||||
std::ofstream out_file(temp_filename);
|
||||
if (!out_file) {
|
||||
LOG(ERROR) << "Could not open " << temp_filename;
|
||||
} else {
|
||||
out_file.write(data.data(), data.size());
|
||||
}
|
||||
|
||||
if (rename(temp_filename, chunk_file.c_str()) != 0) {
|
||||
LOG(ERROR) << "Failed to rename to " << chunk_file;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Wrote chunk : " << chunk_file;
|
||||
}
|
||||
|
||||
void FlowPackagerCalculator::PrepareCurrentForNextChunk(
|
||||
TrackingDataChunk* chunk) {
|
||||
CHECK(chunk);
|
||||
if (chunk->item_size() == 0) {
|
||||
LOG(ERROR) << "Called with empty chunk. Unexpected.";
|
||||
return;
|
||||
}
|
||||
|
||||
chunk->set_first_chunk(false);
|
||||
|
||||
// Buffer last item for next chunk.
|
||||
TrackingDataChunk::Item last_item;
|
||||
last_item.Swap(chunk->mutable_item(chunk->item_size() - 1));
|
||||
|
||||
chunk->Clear();
|
||||
chunk->add_item()->Swap(&last_item);
|
||||
|
||||
++chunk_idx_;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
36
mediapipe/calculators/video/flow_packager_calculator.proto
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/tracking/flow_packager.proto";
|
||||
|
||||
message FlowPackagerCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional FlowPackagerCalculatorOptions ext = 271236147;
|
||||
}
|
||||
|
||||
optional mediapipe.FlowPackagerOptions flow_packager_options = 1;
|
||||
|
||||
// Chunk size for caching files that are written to the externally specified
|
||||
// caching directory. Specified in msec.
|
||||
// Note that each chunk always contains at its end the first frame of the
|
||||
// next chunk (to enable forward tracking across chunk boundaries).
|
||||
optional int32 caching_chunk_size_msec = 2 [default = 2500];
|
||||
|
||||
optional string cache_file_format = 3 [default = "chunk_%04d"];
|
||||
}
|
||||
988
mediapipe/calculators/video/motion_analysis_calculator.cc
Normal file
|
|
@ -0,0 +1,988 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/calculators/video/motion_analysis_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/video_stream_header.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/frame_selection.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_analysis.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.h"
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
using mediapipe::AffineAdapter;
|
||||
using mediapipe::CameraMotion;
|
||||
using mediapipe::FrameSelectionResult;
|
||||
using mediapipe::Homography;
|
||||
using mediapipe::HomographyAdapter;
|
||||
using mediapipe::LinearSimilarityModel;
|
||||
using mediapipe::MixtureHomography;
|
||||
using mediapipe::MixtureRowWeights;
|
||||
using mediapipe::MotionAnalysis;
|
||||
using mediapipe::ProjectViaFit;
|
||||
using mediapipe::RegionFlowComputationOptions;
|
||||
using mediapipe::RegionFlowFeatureList;
|
||||
using mediapipe::SalientPointFrame;
|
||||
using mediapipe::TranslationModel;
|
||||
|
||||
const char kOptionsTag[] = "OPTIONS";
|
||||
|
||||
// A calculator that performs motion analysis on an incoming video stream.
|
||||
//
|
||||
// Input streams: (at least one of them is required).
|
||||
// VIDEO: The input video stream (ImageFrame, sRGB, sRGBA or GRAY8).
|
||||
// SELECTION: Optional input stream to perform analysis only on selected
|
||||
// frames. If present needs to contain camera motion
|
||||
// and features.
|
||||
//
|
||||
// Input side packets:
|
||||
// CSV_FILE: Read motion models as homographies from CSV file. Expected
|
||||
// to be defined in the frame domain (un-normalized).
|
||||
// Should store 9 floats per row.
|
||||
// Specify number of homographies per frames via option
|
||||
// meta_models_per_frame. For values > 1, MixtureHomographies
|
||||
// are created, for value == 1, a single Homography is used.
|
||||
// DOWNSAMPLE: Optionally specify downsampling factor via input side packet
|
||||
// overriding value in the graph settings.
|
||||
// Output streams (all are optional).
|
||||
// FLOW: Sparse feature tracks in form of proto RegionFlowFeatureList.
|
||||
// CAMERA: Camera motion as proto CameraMotion describing the per frame-
|
||||
// pair motion. Has VideoHeader from input video.
|
||||
// SALIENCY: Foreground saliency (objects moving different from the
|
||||
// background) as proto SalientPointFrame.
|
||||
// VIZ: Visualization stream as ImageFrame, sRGB, visualizing
|
||||
// features and saliency (set via
|
||||
// analysis_options().visualization_options())
|
||||
// DENSE_FG: Dense foreground stream, describing per-pixel foreground-
|
||||
// ness as confidence between 0 (background) and 255
|
||||
// (foreground). Output is ImageFrame (GRAY8).
|
||||
// VIDEO_OUT: Optional output stream when SELECTION is used. Output is input
|
||||
// VIDEO at the selected frames. Required VIDEO to be present.
|
||||
// GRAY_VIDEO_OUT: Optional output stream for downsampled, grayscale video.
|
||||
// Requires VIDEO to be present and SELECTION to not be used.
|
||||
class MotionAnalysisCalculator : public CalculatorBase {
|
||||
// TODO: Activate once leakr approval is ready.
|
||||
// typedef com::google::android::libraries::micro::proto::Data HomographyData;
|
||||
|
||||
public:
|
||||
~MotionAnalysisCalculator() override = default;
|
||||
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Close(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
// Outputs results to Outputs() if MotionAnalysis buffered sufficient results.
|
||||
// Otherwise no-op. Set flush to true to force output of all buffered data.
|
||||
void OutputMotionAnalyzedFrames(bool flush, CalculatorContext* cc);
|
||||
|
||||
// Lazy init function to be called on Process.
|
||||
::mediapipe::Status InitOnProcess(InputStream* video_stream,
|
||||
InputStream* selection_stream);
|
||||
|
||||
// Parses CSV file contents to homographies.
|
||||
bool ParseModelCSV(const std::string& contents,
|
||||
std::deque<Homography>* homographies);
|
||||
|
||||
// Turns list of 9-tuple floating values into set of homographies.
|
||||
bool HomographiesFromValues(const std::vector<float>& homog_values,
|
||||
std::deque<Homography>* homographies);
|
||||
|
||||
// Appends CameraMotions and features from homographies.
|
||||
// Set append_identity to true to add an identity transform to the beginning
|
||||
// of the each list *in addition* to the motions derived from homographies.
|
||||
void AppendCameraMotionsFromHomographies(
|
||||
const std::deque<Homography>& homographies, bool append_identity,
|
||||
std::deque<CameraMotion>* camera_motions,
|
||||
std::deque<RegionFlowFeatureList>* features);
|
||||
|
||||
// Helper function to subtract current metadata motion from features. Used
|
||||
// for hybrid estimation case.
|
||||
void SubtractMetaMotion(const CameraMotion& meta_motion,
|
||||
RegionFlowFeatureList* features);
|
||||
|
||||
// Inverse of above function to add back meta motion and replace
|
||||
// feature location with originals after estimation.
|
||||
void AddMetaMotion(const CameraMotion& meta_motion,
|
||||
const RegionFlowFeatureList& meta_features,
|
||||
RegionFlowFeatureList* features, CameraMotion* motion);
|
||||
|
||||
MotionAnalysisCalculatorOptions options_;
|
||||
int frame_width_ = -1;
|
||||
int frame_height_ = -1;
|
||||
int frame_idx_ = 0;
|
||||
|
||||
// Buffers incoming video frame packets (if visualization output is requested)
|
||||
std::vector<Packet> packet_buffer_;
|
||||
|
||||
// Buffers incoming timestamps until MotionAnalysis is ready to output via
|
||||
// above OutputMotionAnalyzedFrames.
|
||||
std::vector<Timestamp> timestamp_buffer_;
|
||||
|
||||
// Input indicators for each stream.
|
||||
bool selection_input_ = false;
|
||||
bool video_input_ = false;
|
||||
|
||||
// Output indicators for each stream.
|
||||
bool region_flow_feature_output_ = false;
|
||||
bool camera_motion_output_ = false;
|
||||
bool saliency_output_ = false;
|
||||
bool visualize_output_ = false;
|
||||
bool dense_foreground_output_ = false;
|
||||
bool video_output_ = false;
|
||||
bool grayscale_output_ = false;
|
||||
bool csv_file_input_ = false;
|
||||
|
||||
// Inidicates if saliency should be computed.
|
||||
bool with_saliency_ = false;
|
||||
|
||||
// Set if hybrid meta analysis - see proto for details.
|
||||
bool hybrid_meta_analysis_ = false;
|
||||
|
||||
// Concatenated motions for each selected frame. Used in case
|
||||
// hybrid estimation is requested to fallback to valid models.
|
||||
std::deque<CameraMotion> selected_motions_;
|
||||
|
||||
// Normalized homographies from CSV file or metadata.
|
||||
std::deque<Homography> meta_homographies_;
|
||||
std::deque<CameraMotion> meta_motions_;
|
||||
std::deque<RegionFlowFeatureList> meta_features_;
|
||||
|
||||
// Offset into above meta_motions_ and features_ when using
|
||||
// hybrid meta analysis.
|
||||
int hybrid_meta_offset_ = 0;
|
||||
|
||||
std::unique_ptr<MotionAnalysis> motion_analysis_;
|
||||
|
||||
std::unique_ptr<MixtureRowWeights> row_weights_;
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(MotionAnalysisCalculator);
|
||||
|
||||
::mediapipe::Status MotionAnalysisCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
if (cc->Inputs().HasTag("VIDEO")) {
|
||||
cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
|
||||
}
|
||||
|
||||
// Optional input stream from frame selection calculator.
|
||||
if (cc->Inputs().HasTag("SELECTION")) {
|
||||
cc->Inputs().Tag("SELECTION").Set<FrameSelectionResult>();
|
||||
}
|
||||
|
||||
RET_CHECK(cc->Inputs().HasTag("VIDEO") || cc->Inputs().HasTag("SELECTION"))
|
||||
<< "Either VIDEO, SELECTION must be specified.";
|
||||
|
||||
if (cc->Outputs().HasTag("FLOW")) {
|
||||
cc->Outputs().Tag("FLOW").Set<RegionFlowFeatureList>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("CAMERA")) {
|
||||
cc->Outputs().Tag("CAMERA").Set<CameraMotion>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("SALIENCY")) {
|
||||
cc->Outputs().Tag("SALIENCY").Set<SalientPointFrame>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("VIZ")) {
|
||||
cc->Outputs().Tag("VIZ").Set<ImageFrame>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("DENSE_FG")) {
|
||||
cc->Outputs().Tag("DENSE_FG").Set<ImageFrame>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("VIDEO_OUT")) {
|
||||
cc->Outputs().Tag("VIDEO_OUT").Set<ImageFrame>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("GRAY_VIDEO_OUT")) {
|
||||
// We only output grayscale video if we're actually performing full region-
|
||||
// flow analysis on the video.
|
||||
RET_CHECK(cc->Inputs().HasTag("VIDEO") &&
|
||||
!cc->Inputs().HasTag("SELECTION"));
|
||||
cc->Outputs().Tag("GRAY_VIDEO_OUT").Set<ImageFrame>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("CSV_FILE")) {
|
||||
cc->InputSidePackets().Tag("CSV_FILE").Set<std::string>();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
|
||||
cc->InputSidePackets().Tag("DOWNSAMPLE").Set<float>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
|
||||
cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status MotionAnalysisCalculator::Open(CalculatorContext* cc) {
|
||||
options_ =
|
||||
tool::RetrieveOptions(cc->Options<MotionAnalysisCalculatorOptions>(),
|
||||
cc->InputSidePackets(), kOptionsTag);
|
||||
|
||||
video_input_ = cc->Inputs().HasTag("VIDEO");
|
||||
selection_input_ = cc->Inputs().HasTag("SELECTION");
|
||||
region_flow_feature_output_ = cc->Outputs().HasTag("FLOW");
|
||||
camera_motion_output_ = cc->Outputs().HasTag("CAMERA");
|
||||
saliency_output_ = cc->Outputs().HasTag("SALIENCY");
|
||||
visualize_output_ = cc->Outputs().HasTag("VIZ");
|
||||
dense_foreground_output_ = cc->Outputs().HasTag("DENSE_FG");
|
||||
video_output_ = cc->Outputs().HasTag("VIDEO_OUT");
|
||||
grayscale_output_ = cc->Outputs().HasTag("GRAY_VIDEO_OUT");
|
||||
csv_file_input_ = cc->InputSidePackets().HasTag("CSV_FILE");
|
||||
hybrid_meta_analysis_ = options_.meta_analysis() ==
|
||||
MotionAnalysisCalculatorOptions::META_ANALYSIS_HYBRID;
|
||||
|
||||
if (video_output_) {
|
||||
RET_CHECK(selection_input_) << "VIDEO_OUT requires SELECTION input";
|
||||
}
|
||||
|
||||
if (selection_input_) {
|
||||
switch (options_.selection_analysis()) {
|
||||
case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
|
||||
RET_CHECK(!visualize_output_)
|
||||
<< "Visualization not supported for NO_ANALYSIS_USE_SELECTION";
|
||||
RET_CHECK(!dense_foreground_output_)
|
||||
<< "Dense foreground not supported for NO_ANALYSIS_USE_SELECTION";
|
||||
RET_CHECK(!saliency_output_)
|
||||
<< "Saliency output not supported for NO_ANALYSIS_USE_SELECTION";
|
||||
break;
|
||||
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE:
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED:
|
||||
RET_CHECK(video_input_) << "Need video input for feature tracking.";
|
||||
break;
|
||||
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
|
||||
// Nothing to add here.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (visualize_output_ || dense_foreground_output_ || video_output_) {
|
||||
RET_CHECK(video_input_) << "Video input required.";
|
||||
}
|
||||
|
||||
if (csv_file_input_) {
|
||||
RET_CHECK(!selection_input_)
|
||||
<< "Can not use selection input with csv input.";
|
||||
if (!hybrid_meta_analysis_) {
|
||||
RET_CHECK(!saliency_output_ && !visualize_output_ &&
|
||||
!dense_foreground_output_ && !grayscale_output_)
|
||||
<< "CSV file and meta input only supports flow and camera motion "
|
||||
<< "output when using metadata only.";
|
||||
}
|
||||
}
|
||||
|
||||
if (csv_file_input_) {
|
||||
// Read from file and parse.
|
||||
const std::string filename =
|
||||
cc->InputSidePackets().Tag("CSV_FILE").Get<std::string>();
|
||||
|
||||
std::string file_contents;
|
||||
std::ifstream input_file(filename, std::ios::in);
|
||||
input_file.seekg(0, std::ios::end);
|
||||
const int file_length = input_file.tellg();
|
||||
file_contents.resize(file_length);
|
||||
input_file.seekg(0, std::ios::beg);
|
||||
input_file.read(&file_contents[0], file_length);
|
||||
input_file.close();
|
||||
|
||||
RET_CHECK(ParseModelCSV(file_contents, &meta_homographies_))
|
||||
<< "Could not parse CSV file";
|
||||
}
|
||||
|
||||
// Get video header from video or selection input if present.
|
||||
const VideoHeader* video_header = nullptr;
|
||||
if (video_input_ && !cc->Inputs().Tag("VIDEO").Header().IsEmpty()) {
|
||||
video_header = &(cc->Inputs().Tag("VIDEO").Header().Get<VideoHeader>());
|
||||
} else if (selection_input_ &&
|
||||
!cc->Inputs().Tag("SELECTION").Header().IsEmpty()) {
|
||||
video_header = &(cc->Inputs().Tag("SELECTION").Header().Get<VideoHeader>());
|
||||
} else {
|
||||
LOG(WARNING) << "No input video header found. Downstream calculators "
|
||||
"expecting video headers are likely to fail.";
|
||||
}
|
||||
|
||||
with_saliency_ = options_.analysis_options().compute_motion_saliency();
|
||||
// Force computation of saliency if requested as output.
|
||||
if (cc->Outputs().HasTag("SALIENCY")) {
|
||||
with_saliency_ = true;
|
||||
if (!options_.analysis_options().compute_motion_saliency()) {
|
||||
LOG(WARNING) << "Enable saliency computation. Set "
|
||||
<< "compute_motion_saliency to true to silence this "
|
||||
<< "warning.";
|
||||
options_.mutable_analysis_options()->set_compute_motion_saliency(true);
|
||||
}
|
||||
}
|
||||
|
||||
if (options_.bypass_mode()) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
|
||||
options_.mutable_analysis_options()
|
||||
->mutable_flow_options()
|
||||
->set_downsample_factor(
|
||||
cc->InputSidePackets().Tag("DOWNSAMPLE").Get<float>());
|
||||
}
|
||||
|
||||
// If no video header is provided, just return and initialize on the first
|
||||
// Process() call.
|
||||
if (video_header == nullptr) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
////////////// EARLY RETURN; ONLY HEADER OUTPUT SHOULD GO HERE ///////////////
|
||||
|
||||
if (visualize_output_) {
|
||||
cc->Outputs().Tag("VIZ").SetHeader(Adopt(new VideoHeader(*video_header)));
|
||||
}
|
||||
|
||||
if (video_output_) {
|
||||
cc->Outputs()
|
||||
.Tag("VIDEO_OUT")
|
||||
.SetHeader(Adopt(new VideoHeader(*video_header)));
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("DENSE_FG")) {
|
||||
std::unique_ptr<VideoHeader> foreground_header(
|
||||
new VideoHeader(*video_header));
|
||||
foreground_header->format = ImageFormat::GRAY8;
|
||||
cc->Outputs().Tag("DENSE_FG").SetHeader(Adopt(foreground_header.release()));
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("CAMERA")) {
|
||||
cc->Outputs().Tag("CAMERA").SetHeader(
|
||||
Adopt(new VideoHeader(*video_header)));
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("SALIENCY")) {
|
||||
cc->Outputs()
|
||||
.Tag("SALIENCY")
|
||||
.SetHeader(Adopt(new VideoHeader(*video_header)));
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status MotionAnalysisCalculator::Process(CalculatorContext* cc) {
|
||||
if (options_.bypass_mode()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
InputStream* video_stream =
|
||||
video_input_ ? &(cc->Inputs().Tag("VIDEO")) : nullptr;
|
||||
InputStream* selection_stream =
|
||||
selection_input_ ? &(cc->Inputs().Tag("SELECTION")) : nullptr;
|
||||
|
||||
// Checked on Open.
|
||||
CHECK(video_stream || selection_stream);
|
||||
|
||||
// Lazy init.
|
||||
if (frame_width_ < 0 || frame_height_ < 0) {
|
||||
MP_RETURN_IF_ERROR(InitOnProcess(video_stream, selection_stream));
|
||||
}
|
||||
|
||||
const Timestamp timestamp = cc->InputTimestamp();
|
||||
if ((csv_file_input_) && !hybrid_meta_analysis_) {
|
||||
if (camera_motion_output_) {
|
||||
RET_CHECK(!meta_motions_.empty()) << "Insufficient metadata.";
|
||||
|
||||
CameraMotion output_motion = meta_motions_.front();
|
||||
meta_motions_.pop_front();
|
||||
output_motion.set_timestamp_usec(timestamp.Value());
|
||||
cc->Outputs().Tag("CAMERA").Add(new CameraMotion(output_motion),
|
||||
timestamp);
|
||||
}
|
||||
|
||||
if (region_flow_feature_output_) {
|
||||
RET_CHECK(!meta_features_.empty()) << "Insufficient frames in CSV file";
|
||||
RegionFlowFeatureList output_features = meta_features_.front();
|
||||
meta_features_.pop_front();
|
||||
|
||||
output_features.set_timestamp_usec(timestamp.Value());
|
||||
cc->Outputs().Tag("FLOW").Add(new RegionFlowFeatureList(output_features),
|
||||
timestamp);
|
||||
}
|
||||
|
||||
++frame_idx_;
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
if (motion_analysis_ == nullptr) {
|
||||
// We do not need MotionAnalysis when using just metadata.
|
||||
motion_analysis_.reset(new MotionAnalysis(options_.analysis_options(),
|
||||
frame_width_, frame_height_));
|
||||
}
|
||||
|
||||
std::unique_ptr<FrameSelectionResult> frame_selection_result;
|
||||
// Always use frame if selection is not activated.
|
||||
bool use_frame = !selection_input_;
|
||||
if (selection_input_) {
|
||||
CHECK(selection_stream);
|
||||
|
||||
// Fill in timestamps we process.
|
||||
if (!selection_stream->Value().IsEmpty()) {
|
||||
ASSIGN_OR_RETURN(
|
||||
frame_selection_result,
|
||||
selection_stream->Value().ConsumeOrCopy<FrameSelectionResult>());
|
||||
use_frame = true;
|
||||
|
||||
// Make sure both features and camera motion are present.
|
||||
RET_CHECK(frame_selection_result->has_camera_motion() &&
|
||||
frame_selection_result->has_features())
|
||||
<< "Frame selection input error at: " << timestamp
|
||||
<< " both camera motion and features need to be "
|
||||
"present in FrameSelectionResult. "
|
||||
<< frame_selection_result->has_camera_motion() << " , "
|
||||
<< frame_selection_result->has_features();
|
||||
}
|
||||
}
|
||||
|
||||
if (selection_input_ && use_frame &&
|
||||
options_.selection_analysis() ==
|
||||
MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION) {
|
||||
// Output concatenated results, nothing to compute here.
|
||||
if (camera_motion_output_) {
|
||||
cc->Outputs().Tag("CAMERA").Add(
|
||||
frame_selection_result->release_camera_motion(), timestamp);
|
||||
}
|
||||
if (region_flow_feature_output_) {
|
||||
cc->Outputs().Tag("FLOW").Add(frame_selection_result->release_features(),
|
||||
timestamp);
|
||||
}
|
||||
|
||||
if (video_output_) {
|
||||
cc->Outputs().Tag("VIDEO_OUT").AddPacket(video_stream->Value());
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
if (use_frame) {
|
||||
if (!selection_input_) {
|
||||
const cv::Mat input_view =
|
||||
formats::MatView(&video_stream->Get<ImageFrame>());
|
||||
if (hybrid_meta_analysis_) {
|
||||
// Seed with meta homography.
|
||||
RET_CHECK(hybrid_meta_offset_ < meta_motions_.size())
|
||||
<< "Not enough metadata received for hybrid meta analysis";
|
||||
Homography initial_transform =
|
||||
meta_motions_[hybrid_meta_offset_].homography();
|
||||
std::function<void(RegionFlowFeatureList*)> subtract_helper = std::bind(
|
||||
&MotionAnalysisCalculator::SubtractMetaMotion, this,
|
||||
meta_motions_[hybrid_meta_offset_], std::placeholders::_1);
|
||||
|
||||
// Keep original features before modification around.
|
||||
motion_analysis_->AddFrameGeneric(
|
||||
input_view, timestamp.Value(), initial_transform, nullptr, nullptr,
|
||||
&subtract_helper, &meta_features_[hybrid_meta_offset_]);
|
||||
++hybrid_meta_offset_;
|
||||
} else {
|
||||
motion_analysis_->AddFrame(input_view, timestamp.Value());
|
||||
}
|
||||
} else {
|
||||
selected_motions_.push_back(frame_selection_result->camera_motion());
|
||||
switch (options_.selection_analysis()) {
|
||||
case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
|
||||
return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "Should not reach this point!";
|
||||
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
|
||||
motion_analysis_->AddFeatures(frame_selection_result->features());
|
||||
break;
|
||||
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE: {
|
||||
const cv::Mat input_view =
|
||||
formats::MatView(&video_stream->Get<ImageFrame>());
|
||||
motion_analysis_->AddFrame(input_view, timestamp.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED: {
|
||||
Homography homography;
|
||||
CameraMotionToHomography(frame_selection_result->camera_motion(),
|
||||
&homography);
|
||||
const cv::Mat input_view =
|
||||
formats::MatView(&video_stream->Get<ImageFrame>());
|
||||
motion_analysis_->AddFrameGeneric(input_view, timestamp.Value(),
|
||||
homography, &homography);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
timestamp_buffer_.push_back(timestamp);
|
||||
++frame_idx_;
|
||||
|
||||
VLOG_EVERY_N(0, 100) << "Analyzed frame " << frame_idx_;
|
||||
|
||||
// Buffer input frames only if visualization is requested.
|
||||
if (visualize_output_ || video_output_) {
|
||||
packet_buffer_.push_back(video_stream->Value());
|
||||
}
|
||||
|
||||
// If requested, output grayscale thumbnails
|
||||
if (grayscale_output_) {
|
||||
cv::Mat grayscale_mat = motion_analysis_->GetGrayscaleFrameFromResults();
|
||||
std::unique_ptr<ImageFrame> grayscale_image(new ImageFrame(
|
||||
ImageFormat::GRAY8, grayscale_mat.cols, grayscale_mat.rows));
|
||||
cv::Mat image_frame_mat = formats::MatView(grayscale_image.get());
|
||||
grayscale_mat.copyTo(image_frame_mat);
|
||||
|
||||
cc->Outputs()
|
||||
.Tag("GRAY_VIDEO_OUT")
|
||||
.Add(grayscale_image.release(), timestamp);
|
||||
}
|
||||
|
||||
// Output other results, if we have any yet.
|
||||
OutputMotionAnalyzedFrames(false, cc);
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status MotionAnalysisCalculator::Close(CalculatorContext* cc) {
|
||||
// Guard against empty videos.
|
||||
if (motion_analysis_) {
|
||||
OutputMotionAnalyzedFrames(true, cc);
|
||||
}
|
||||
if (csv_file_input_) {
|
||||
if (!meta_motions_.empty()) {
|
||||
LOG(ERROR) << "More motions than frames. Unexpected! Remainder: "
|
||||
<< meta_motions_.size();
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
void MotionAnalysisCalculator::OutputMotionAnalyzedFrames(
|
||||
bool flush, CalculatorContext* cc) {
|
||||
std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
|
||||
std::vector<std::unique_ptr<CameraMotion>> camera_motions;
|
||||
std::vector<std::unique_ptr<SalientPointFrame>> saliency;
|
||||
|
||||
const int buffer_size = timestamp_buffer_.size();
|
||||
const int num_results = motion_analysis_->GetResults(
|
||||
flush, &features, &camera_motions, with_saliency_ ? &saliency : nullptr);
|
||||
|
||||
CHECK_LE(num_results, buffer_size);
|
||||
|
||||
if (num_results == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int k = 0; k < num_results; ++k) {
|
||||
// Region flow features and camera motion for this frame.
|
||||
auto& feature_list = features[k];
|
||||
auto& camera_motion = camera_motions[k];
|
||||
const Timestamp timestamp = timestamp_buffer_[k];
|
||||
|
||||
if (selection_input_ && options_.hybrid_selection_camera()) {
|
||||
if (camera_motion->type() > selected_motions_.front().type()) {
|
||||
// Composited type is more stable.
|
||||
camera_motion->Swap(&selected_motions_.front());
|
||||
}
|
||||
selected_motions_.pop_front();
|
||||
}
|
||||
|
||||
if (hybrid_meta_analysis_) {
|
||||
AddMetaMotion(meta_motions_.front(), meta_features_.front(),
|
||||
feature_list.get(), camera_motion.get());
|
||||
meta_motions_.pop_front();
|
||||
meta_features_.pop_front();
|
||||
}
|
||||
|
||||
// Video frame for visualization.
|
||||
std::unique_ptr<ImageFrame> visualization_frame;
|
||||
cv::Mat visualization;
|
||||
if (visualize_output_) {
|
||||
// Initialize visualization frame with original frame.
|
||||
visualization_frame.reset(new ImageFrame());
|
||||
visualization_frame->CopyFrom(packet_buffer_[k].Get<ImageFrame>(), 16);
|
||||
visualization = formats::MatView(visualization_frame.get());
|
||||
|
||||
motion_analysis_->RenderResults(
|
||||
*feature_list, *camera_motion,
|
||||
with_saliency_ ? saliency[k].get() : nullptr, &visualization);
|
||||
|
||||
cc->Outputs().Tag("VIZ").Add(visualization_frame.release(), timestamp);
|
||||
}
|
||||
|
||||
// Output dense foreground mask.
|
||||
if (dense_foreground_output_) {
|
||||
std::unique_ptr<ImageFrame> foreground_frame(
|
||||
new ImageFrame(ImageFormat::GRAY8, frame_width_, frame_height_));
|
||||
cv::Mat foreground = formats::MatView(foreground_frame.get());
|
||||
motion_analysis_->ComputeDenseForeground(*feature_list, *camera_motion,
|
||||
&foreground);
|
||||
cc->Outputs().Tag("DENSE_FG").Add(foreground_frame.release(), timestamp);
|
||||
}
|
||||
|
||||
// Output flow features if requested.
|
||||
if (region_flow_feature_output_) {
|
||||
cc->Outputs().Tag("FLOW").Add(feature_list.release(), timestamp);
|
||||
}
|
||||
|
||||
// Output camera motion.
|
||||
if (camera_motion_output_) {
|
||||
cc->Outputs().Tag("CAMERA").Add(camera_motion.release(), timestamp);
|
||||
}
|
||||
|
||||
if (video_output_) {
|
||||
cc->Outputs().Tag("VIDEO_OUT").AddPacket(packet_buffer_[k]);
|
||||
}
|
||||
|
||||
// Output saliency.
|
||||
if (saliency_output_) {
|
||||
cc->Outputs().Tag("SALIENCY").Add(saliency[k].release(), timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
if (hybrid_meta_analysis_) {
|
||||
hybrid_meta_offset_ -= num_results;
|
||||
CHECK_GE(hybrid_meta_offset_, 0);
|
||||
}
|
||||
|
||||
timestamp_buffer_.erase(timestamp_buffer_.begin(),
|
||||
timestamp_buffer_.begin() + num_results);
|
||||
|
||||
if (visualize_output_ || video_output_) {
|
||||
packet_buffer_.erase(packet_buffer_.begin(),
|
||||
packet_buffer_.begin() + num_results);
|
||||
}
|
||||
}
|
||||
|
||||
::mediapipe::Status MotionAnalysisCalculator::InitOnProcess(
|
||||
InputStream* video_stream, InputStream* selection_stream) {
|
||||
if (video_stream) {
|
||||
frame_width_ = video_stream->Get<ImageFrame>().Width();
|
||||
frame_height_ = video_stream->Get<ImageFrame>().Height();
|
||||
|
||||
// Ensure image options are set correctly.
|
||||
auto* region_options =
|
||||
options_.mutable_analysis_options()->mutable_flow_options();
|
||||
|
||||
// Use two possible formats to account for different channel orders.
|
||||
RegionFlowComputationOptions::ImageFormat image_format;
|
||||
RegionFlowComputationOptions::ImageFormat image_format2;
|
||||
switch (video_stream->Get<ImageFrame>().Format()) {
|
||||
case ImageFormat::GRAY8:
|
||||
image_format = image_format2 =
|
||||
RegionFlowComputationOptions::FORMAT_GRAYSCALE;
|
||||
break;
|
||||
|
||||
case ImageFormat::SRGB:
|
||||
image_format = RegionFlowComputationOptions::FORMAT_RGB;
|
||||
image_format2 = RegionFlowComputationOptions::FORMAT_BGR;
|
||||
break;
|
||||
|
||||
case ImageFormat::SRGBA:
|
||||
image_format = RegionFlowComputationOptions::FORMAT_RGBA;
|
||||
image_format2 = RegionFlowComputationOptions::FORMAT_BGRA;
|
||||
break;
|
||||
|
||||
default:
|
||||
RET_CHECK(false) << "Unsupported image format.";
|
||||
}
|
||||
if (region_options->image_format() != image_format &&
|
||||
region_options->image_format() != image_format2) {
|
||||
LOG(WARNING) << "Requested image format in RegionFlowComputation "
|
||||
<< "does not match video stream format. Overriding.";
|
||||
region_options->set_image_format(image_format);
|
||||
}
|
||||
|
||||
// Account for downsampling mode INPUT_SIZE. In this case we are handed
|
||||
// already downsampled frames but the resulting CameraMotion should
|
||||
// be computed on higher resolution as specifed by the downsample scale.
|
||||
if (region_options->downsample_mode() ==
|
||||
RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
|
||||
const float scale = region_options->downsample_factor();
|
||||
frame_width_ = static_cast<int>(std::round(frame_width_ * scale));
|
||||
frame_height_ = static_cast<int>(std::round(frame_height_ * scale));
|
||||
}
|
||||
} else if (selection_stream) {
|
||||
const auto& camera_motion =
|
||||
selection_stream->Get<FrameSelectionResult>().camera_motion();
|
||||
frame_width_ = camera_motion.frame_width();
|
||||
frame_height_ = camera_motion.frame_height();
|
||||
} else {
|
||||
LOG(FATAL) << "Either VIDEO or SELECTION stream need to be specified.";
|
||||
}
|
||||
|
||||
// Filled by CSV file parsing.
|
||||
if (!meta_homographies_.empty()) {
|
||||
CHECK(csv_file_input_);
|
||||
AppendCameraMotionsFromHomographies(meta_homographies_,
|
||||
true, // append identity.
|
||||
&meta_motions_, &meta_features_);
|
||||
meta_homographies_.clear();
|
||||
}
|
||||
|
||||
// Filter weights before using for hybrid mode.
|
||||
if (hybrid_meta_analysis_) {
|
||||
auto* motion_options =
|
||||
options_.mutable_analysis_options()->mutable_motion_options();
|
||||
motion_options->set_filter_initialized_irls_weights(true);
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
bool MotionAnalysisCalculator::ParseModelCSV(
|
||||
const std::string& contents, std::deque<Homography>* homographies) {
|
||||
std::vector<absl::string_view> values =
|
||||
absl::StrSplit(contents, absl::ByAnyChar(",\n"));
|
||||
|
||||
// Trim off any empty lines.
|
||||
while (values.back().empty()) {
|
||||
values.pop_back();
|
||||
}
|
||||
|
||||
// Convert to float.
|
||||
std::vector<float> homog_values;
|
||||
homog_values.reserve(values.size());
|
||||
|
||||
for (const auto& value : values) {
|
||||
double value_64f;
|
||||
if (!absl::SimpleAtod(value, &value_64f)) {
|
||||
LOG(ERROR) << "Not a double, expected!";
|
||||
return false;
|
||||
}
|
||||
|
||||
homog_values.push_back(value_64f);
|
||||
}
|
||||
|
||||
return HomographiesFromValues(homog_values, homographies);
|
||||
}
|
||||
|
||||
bool MotionAnalysisCalculator::HomographiesFromValues(
|
||||
const std::vector<float>& homog_values,
|
||||
std::deque<Homography>* homographies) {
|
||||
CHECK(homographies);
|
||||
|
||||
// Obvious constants are obvious :D
|
||||
constexpr int kHomographyValues = 9;
|
||||
if (homog_values.size() % kHomographyValues != 0) {
|
||||
LOG(ERROR) << "Contents not a multiple of " << kHomographyValues;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int k = 0; k < homog_values.size(); k += kHomographyValues) {
|
||||
std::vector<double> h_vals(kHomographyValues);
|
||||
for (int l = 0; l < kHomographyValues; ++l) {
|
||||
h_vals[l] = homog_values[k + l];
|
||||
}
|
||||
|
||||
// Normalize last entry to 1.
|
||||
if (h_vals[kHomographyValues - 1] == 0) {
|
||||
LOG(ERROR) << "Degenerate homography, last entry is zero";
|
||||
return false;
|
||||
}
|
||||
|
||||
const double scale = 1.0f / h_vals[kHomographyValues - 1];
|
||||
for (int l = 0; l < kHomographyValues; ++l) {
|
||||
h_vals[l] *= scale;
|
||||
}
|
||||
|
||||
Homography h = HomographyAdapter::FromDoublePointer(h_vals.data(), false);
|
||||
homographies->push_back(h);
|
||||
}
|
||||
|
||||
if (homographies->size() % options_.meta_models_per_frame() != 0) {
|
||||
LOG(ERROR) << "Total homographies not a multiple of specified models "
|
||||
<< "per frame.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MotionAnalysisCalculator::SubtractMetaMotion(
|
||||
const CameraMotion& meta_motion, RegionFlowFeatureList* features) {
|
||||
if (meta_motion.mixture_homography().model_size() > 0) {
|
||||
CHECK(row_weights_ != nullptr);
|
||||
RegionFlowFeatureListViaTransform(meta_motion.mixture_homography(),
|
||||
features, -1.0f,
|
||||
1.0f, // subtract transformed.
|
||||
true, // replace feature loc.
|
||||
row_weights_.get());
|
||||
} else {
|
||||
RegionFlowFeatureListViaTransform(meta_motion.homography(), features, -1.0f,
|
||||
1.0f, // subtract transformed.
|
||||
true); // replace feature loc.
|
||||
}
|
||||
|
||||
// Clamp transformed features to domain and handle outliers.
|
||||
const float domain_diam =
|
||||
hypot(features->frame_width(), features->frame_height());
|
||||
const float motion_mag = meta_motion.average_magnitude();
|
||||
// Same irls fraction as used by MODEL_MIXTURE_HOMOGRAPHY scaling in
|
||||
// MotionEstimation.
|
||||
const float irls_fraction = options_.analysis_options()
|
||||
.motion_options()
|
||||
.irls_mixture_fraction_scale() *
|
||||
options_.analysis_options()
|
||||
.motion_options()
|
||||
.irls_motion_magnitude_fraction();
|
||||
float err_scale = std::max(1.0f, motion_mag * irls_fraction);
|
||||
|
||||
const float max_err =
|
||||
options_.meta_outlier_domain_ratio() * domain_diam * err_scale;
|
||||
const float max_err_sq = max_err * max_err;
|
||||
|
||||
for (auto& feature : *features->mutable_feature()) {
|
||||
feature.set_x(
|
||||
std::max(0.0f, std::min(features->frame_width() - 1.0f, feature.x())));
|
||||
feature.set_y(
|
||||
std::max(0.0f, std::min(features->frame_height() - 1.0f, feature.y())));
|
||||
// Label anything with large residual motion an outlier.
|
||||
if (FeatureFlow(feature).Norm2() > max_err_sq) {
|
||||
feature.set_irls_weight(0.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MotionAnalysisCalculator::AddMetaMotion(
|
||||
const CameraMotion& meta_motion, const RegionFlowFeatureList& meta_features,
|
||||
RegionFlowFeatureList* features, CameraMotion* motion) {
|
||||
// Restore old feature location.
|
||||
CHECK_EQ(meta_features.feature_size(), features->feature_size());
|
||||
for (int k = 0; k < meta_features.feature_size(); ++k) {
|
||||
auto feature = features->mutable_feature(k);
|
||||
const auto& meta_feature = meta_features.feature(k);
|
||||
feature->set_x(meta_feature.x());
|
||||
feature->set_y(meta_feature.y());
|
||||
feature->set_dx(meta_feature.dx());
|
||||
feature->set_dy(meta_feature.dy());
|
||||
}
|
||||
|
||||
// Composite camera motion.
|
||||
*motion = ComposeCameraMotion(*motion, meta_motion);
|
||||
// Restore type from metadata, i.e. do not declare motions as invalid.
|
||||
motion->set_type(meta_motion.type());
|
||||
motion->set_match_frame(-1);
|
||||
}
|
||||
|
||||
void MotionAnalysisCalculator::AppendCameraMotionsFromHomographies(
|
||||
const std::deque<Homography>& homographies, bool append_identity,
|
||||
std::deque<CameraMotion>* camera_motions,
|
||||
std::deque<RegionFlowFeatureList>* features) {
|
||||
CHECK(camera_motions);
|
||||
CHECK(features);
|
||||
|
||||
CameraMotion identity;
|
||||
identity.set_frame_width(frame_width_);
|
||||
identity.set_frame_height(frame_height_);
|
||||
|
||||
*identity.mutable_translation() = TranslationModel();
|
||||
*identity.mutable_linear_similarity() = LinearSimilarityModel();
|
||||
*identity.mutable_homography() = Homography();
|
||||
identity.set_type(CameraMotion::VALID);
|
||||
identity.set_match_frame(0);
|
||||
|
||||
RegionFlowFeatureList empty_list;
|
||||
empty_list.set_long_tracks(true);
|
||||
empty_list.set_match_frame(-1);
|
||||
empty_list.set_frame_width(frame_width_);
|
||||
empty_list.set_frame_height(frame_height_);
|
||||
|
||||
if (append_identity) {
|
||||
camera_motions->push_back(identity);
|
||||
features->push_back(empty_list);
|
||||
}
|
||||
|
||||
const int models_per_frame = options_.meta_models_per_frame();
|
||||
CHECK_GT(models_per_frame, 0) << "At least one model per frame is needed";
|
||||
CHECK_EQ(0, homographies.size() % models_per_frame);
|
||||
const int num_frames = homographies.size() / models_per_frame;
|
||||
|
||||
// Heuristic sigma, similar to what we use for rolling shutter removal.
|
||||
const float mixture_sigma = 1.0f / models_per_frame;
|
||||
|
||||
if (row_weights_ == nullptr) {
|
||||
row_weights_.reset(new MixtureRowWeights(frame_height_,
|
||||
frame_height_ / 10, // 10% margin
|
||||
mixture_sigma * frame_height_,
|
||||
1.0f, models_per_frame));
|
||||
}
|
||||
|
||||
for (int f = 0; f < num_frames; ++f) {
|
||||
MixtureHomography mix_homog;
|
||||
const int model_start = f * models_per_frame;
|
||||
|
||||
for (int k = 0; k < models_per_frame; ++k) {
|
||||
const Homography& homog = homographies[model_start + k];
|
||||
*mix_homog.add_model() = ModelInvert(homog);
|
||||
}
|
||||
|
||||
CameraMotion c = identity;
|
||||
c.set_match_frame(-1);
|
||||
|
||||
if (mix_homog.model_size() > 1) {
|
||||
*c.mutable_mixture_homography() = mix_homog;
|
||||
c.set_mixture_row_sigma(mixture_sigma);
|
||||
|
||||
for (int k = 0; k < models_per_frame; ++k) {
|
||||
c.add_mixture_inlier_coverage(1.0f);
|
||||
}
|
||||
*c.add_mixture_homography_spectrum() = mix_homog;
|
||||
c.set_rolling_shutter_motion_index(0);
|
||||
|
||||
*c.mutable_homography() = ProjectViaFit<Homography>(
|
||||
mix_homog, frame_width_, frame_height_, row_weights_.get());
|
||||
} else {
|
||||
// Guaranteed to exist because to check that models_per_frame > 0 above.
|
||||
*c.mutable_homography() = mix_homog.model(0);
|
||||
}
|
||||
|
||||
// Project remaining motions down.
|
||||
*c.mutable_linear_similarity() = ProjectViaFit<LinearSimilarityModel>(
|
||||
c.homography(), frame_width_, frame_height_);
|
||||
*c.mutable_translation() = ProjectViaFit<TranslationModel>(
|
||||
c.homography(), frame_width_, frame_height_);
|
||||
|
||||
c.set_average_magnitude(
|
||||
std::hypot(c.translation().dx(), c.translation().dy()));
|
||||
|
||||
camera_motions->push_back(c);
|
||||
features->push_back(empty_list);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
111
mediapipe/calculators/video/motion_analysis_calculator.proto
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/util/tracking/motion_analysis.proto";
|
||||
|
||||
// Next tag: 10
|
||||
message MotionAnalysisCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional MotionAnalysisCalculatorOptions ext = 270698255;
|
||||
}
|
||||
|
||||
optional mediapipe.MotionAnalysisOptions analysis_options = 1;
|
||||
|
||||
// Determines how optional input SELECTION (if present) is used to compute
|
||||
// the final camera motion.
|
||||
enum SelectionAnalysis {
|
||||
// Recompute camera motion for selected frame neighbors.
|
||||
ANALYSIS_RECOMPUTE = 1;
|
||||
|
||||
// Use composited camera motion and region flow from SELECTION input. No
|
||||
// tracking or re-computation is performed.
|
||||
// Note that in this case only CAMERA, FLOW and VIDEO_OUT tags are
|
||||
// supported as output.
|
||||
NO_ANALYSIS_USE_SELECTION = 2;
|
||||
|
||||
// Recompute camera motion for selected frame neighbors using
|
||||
// features supplied by SELECTION input. No feature tracking is performed.
|
||||
ANALYSIS_FROM_FEATURES = 3;
|
||||
|
||||
// Recomputes camera motion for selected frame neighbors but seeds
|
||||
// initial transform with camera motion from SELECTION input.
|
||||
ANALYSIS_WITH_SEED = 4;
|
||||
}
|
||||
|
||||
optional SelectionAnalysis selection_analysis = 4
|
||||
[default = ANALYSIS_WITH_SEED];
|
||||
|
||||
// If activated when SELECTION input is activated, will replace the computed
|
||||
// camera motion (for any of the ANALYSIS_* case above) with the one supplied
|
||||
// by the frame selection, in case the frame selection one is more stable.
|
||||
// For example, if recomputed camera motion is unstable but the one from
|
||||
// the selection result is stable, will use the stable result instead.
|
||||
optional bool hybrid_selection_camera = 5 [default = false];
|
||||
|
||||
// Determines how optional input META is used to compute the final camera
|
||||
// motion.
|
||||
enum MetaAnalysis {
|
||||
// Uses metadata supplied motions as is.
|
||||
META_ANALYSIS_USE_META = 1;
|
||||
|
||||
// Seeds visual tracking from metadata motions - estimates visual residual
|
||||
// motion and combines with metadata.
|
||||
META_ANALYSIS_HYBRID = 2;
|
||||
}
|
||||
|
||||
optional MetaAnalysis meta_analysis = 8 [default = META_ANALYSIS_USE_META];
|
||||
|
||||
// Determines number of homography models per frame stored in the CSV file
|
||||
// or the homography metadata in META.
|
||||
// For values > 1, MixtureHomographies are created.
|
||||
optional int32 meta_models_per_frame = 6 [default = 1];
|
||||
|
||||
// Used for META_ANALYSIS_HYBRID. Rejects features which flow deviates
|
||||
// domain_ratio * image diagonal size from the ground truth metadata motion.
|
||||
optional float meta_outlier_domain_ratio = 9 [default = 0.0015];
|
||||
|
||||
// If true, the MotionAnalysisCalculator will skip all processing and emit no
|
||||
// packets on any output. This is useful for quickly creating different
|
||||
// versions of a MediaPipe graph without changing its structure, assuming that
|
||||
// downstream calculators can handle missing input packets.
|
||||
// TODO: Remove this hack. See b/36485206 for more details.
|
||||
optional bool bypass_mode = 7 [default = false];
|
||||
}
|
||||
|
||||
// Taken from
|
||||
// java/com/google/android/libraries/microvideo/proto/microvideo.proto to
|
||||
// satisfy leakr requirements
|
||||
// TODO: Remove and use above proto.
|
||||
message HomographyData {
|
||||
// For each frame, there are 12 homography matrices stored. Each matrix is
|
||||
// 3x3 (9 elements). This field will contain 12 x 3 x 3 float values. The
|
||||
// first row of the first homography matrix will be followed by the second row
|
||||
// of the first homography matrix, followed by third row of first homography
|
||||
// matrix, followed by the first row of the second homography matrix, etc.
|
||||
repeated float motion_homography_data = 1 [packed = true];
|
||||
|
||||
// Vector containing histogram counts for individual patches in the frame.
|
||||
repeated uint32 histogram_count_data = 2 [packed = true];
|
||||
|
||||
// The width of the frame at the time metadata was sampled.
|
||||
optional int32 frame_width = 3;
|
||||
|
||||
// The height of the frame at the time metadata was sampled.
|
||||
optional int32 frame_height = 4;
|
||||
}
|
||||
BIN
mediapipe/calculators/video/testdata/lenna.png
vendored
Normal file
|
After Width: | Height: | Size: 247 KiB |
134
mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt
vendored
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
input_stream: "image_cpu_frames"
|
||||
input_stream: "start_pos"
|
||||
input_stream: "ra_track"
|
||||
|
||||
num_threads: 4
|
||||
|
||||
node: {
|
||||
calculator: "MotionAnalysisCalculator"
|
||||
input_stream: "VIDEO:image_cpu_frames"
|
||||
output_stream: "CAMERA:camera_motion"
|
||||
output_stream: "FLOW:region_flow"
|
||||
|
||||
options: {
|
||||
[mediapipe.MotionAnalysisCalculatorOptions.ext]: {
|
||||
analysis_options: {
|
||||
analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
|
||||
|
||||
flow_options: {
|
||||
# Maybe move down to 50
|
||||
fast_estimation_min_block_size: 100
|
||||
top_inlier_sets: 1
|
||||
frac_inlier_error_threshold: 3e-3
|
||||
downsample_mode: DOWNSAMPLE_NONE
|
||||
verification_distance: 5.0
|
||||
verify_long_feature_acceleration: true
|
||||
verify_long_feature_trigger_ratio: 0.1
|
||||
tracking_options: {
|
||||
max_features: 500
|
||||
adaptive_extraction_levels: 2
|
||||
min_eig_val_settings: {
|
||||
adaptive_lowest_quality_level: 2e-4
|
||||
}
|
||||
klt_tracker_implementation: KLT_OPENCV
|
||||
}
|
||||
}
|
||||
|
||||
motion_options: {
|
||||
label_empty_frames_as_valid: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "FlowPackagerCalculator"
|
||||
input_stream: "FLOW:region_flow"
|
||||
input_stream: "CAMERA:camera_motion"
|
||||
output_stream: "TRACKING:tracking_data"
|
||||
|
||||
options: {
|
||||
[mediapipe.FlowPackagerCalculatorOptions.ext]: {
|
||||
flow_packager_options: {
|
||||
binary_tracking_data_support: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "BoxTrackerCalculator"
|
||||
input_stream: "TRACKING:tracking_data"
|
||||
input_stream: "START_POS:start_pos"
|
||||
output_stream: "BOXES:boxes"
|
||||
input_side_packet: "OPTIONS:calculator_options"
|
||||
|
||||
input_stream_handler: {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options: {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
|
||||
sync_set: {
|
||||
tag_index: "TRACKING"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "START_POS"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options: {
|
||||
[mediapipe.BoxTrackerCalculatorOptions.ext]: {
|
||||
tracker_options: {
|
||||
track_step_options: {
|
||||
track_object_and_camera: true
|
||||
tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
|
||||
object_similarity_min_contd_inliers: 6
|
||||
inlier_spring_force: 0.0
|
||||
static_motion_temporal_ratio: 3e-2
|
||||
}
|
||||
}
|
||||
visualize_tracking_data: false
|
||||
streaming_track_data_cache_size: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "BoxTrackerCalculator"
|
||||
input_stream: "TRACKING:tracking_data"
|
||||
input_stream: "RA_TRACK:ra_track"
|
||||
output_stream: "RA_BOXES:ra_boxes"
|
||||
input_side_packet: "OPTIONS:calculator_options"
|
||||
|
||||
input_stream_handler: {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options: {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
|
||||
sync_set: {
|
||||
tag_index: "TRACKING"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "RA_TRACK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options: {
|
||||
[mediapipe.BoxTrackerCalculatorOptions.ext]: {
|
||||
tracker_options: {
|
||||
track_step_options: {
|
||||
track_object_and_camera: true
|
||||
tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
|
||||
object_similarity_min_contd_inliers: 6
|
||||
inlier_spring_force: 0.0
|
||||
static_motion_temporal_ratio: 3e-2
|
||||
}
|
||||
}
|
||||
visualize_tracking_data: false
|
||||
streaming_track_data_cache_size: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
122
mediapipe/calculators/video/testdata/tracker_graph.pbtxt
vendored
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
input_stream: "image_cpu_frames"
|
||||
input_stream: "start_pos"
|
||||
input_stream: "cancel_object_id"
|
||||
input_stream: "ra_track"
|
||||
input_stream: "restart_pos"
|
||||
input_stream: "track_time"
|
||||
|
||||
num_threads: 4
|
||||
|
||||
node: {
|
||||
calculator: "MotionAnalysisCalculator"
|
||||
options: {
|
||||
[mediapipe.MotionAnalysisCalculatorOptions.ext]: {
|
||||
analysis_options: {
|
||||
analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
|
||||
|
||||
flow_options: {
|
||||
# Maybe move down to 50
|
||||
fast_estimation_min_block_size: 100
|
||||
top_inlier_sets: 1
|
||||
frac_inlier_error_threshold: 3e-3
|
||||
# For mobile application, downsample before input into graph
|
||||
# and use DOWNSAMPLE_TO_INPUT_SIZE and specify
|
||||
# downsampling_factor option or DOWNSAMPLE input_side_packet
|
||||
downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
|
||||
verification_distance: 5.0
|
||||
verify_long_feature_acceleration: true
|
||||
verify_long_feature_trigger_ratio: 0.1
|
||||
tracking_options: {
|
||||
max_features: 500
|
||||
corner_extraction_method: EXTRACTION_FAST
|
||||
adaptive_extraction_levels: 2
|
||||
min_eig_val_settings: {
|
||||
adaptive_lowest_quality_level: 2e-4
|
||||
}
|
||||
klt_tracker_implementation: KLT_OPENCV
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drops packets if calculator cannot keep up with the input rate.
|
||||
input_stream_handler: {
|
||||
input_stream_handler: "FixedSizeInputStreamHandler"
|
||||
}
|
||||
|
||||
input_stream: "VIDEO:image_cpu_frames"
|
||||
input_side_packet: "DOWNSAMPLE:analysis_downsample_factor"
|
||||
input_side_packet: "OPTIONS:calculator_options"
|
||||
output_stream: "CAMERA:camera_motion"
|
||||
output_stream: "FLOW:region_flow"
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "FlowPackagerCalculator"
|
||||
|
||||
input_stream: "FLOW:region_flow"
|
||||
input_stream: "CAMERA:camera_motion"
|
||||
output_stream: "TRACKING:tracking_data"
|
||||
options: {
|
||||
[mediapipe.FlowPackagerCalculatorOptions.ext]: {
|
||||
flow_packager_options: {
|
||||
binary_tracking_data_support: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node: {
|
||||
calculator: "BoxTrackerCalculator"
|
||||
|
||||
input_side_packet: "OPTIONS:calculator_options"
|
||||
input_stream: "TRACKING:tracking_data"
|
||||
input_stream: "TRACK_TIME:track_time"
|
||||
input_stream: "START_POS:start_pos"
|
||||
input_stream: "RESTART_POS:restart_pos"
|
||||
input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
input_stream: "RA_TRACK:ra_track"
|
||||
output_stream: "BOXES:boxes"
|
||||
output_stream: "RA_BOXES:ra_boxes"
|
||||
|
||||
input_stream_handler: {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options: {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
|
||||
sync_set: {
|
||||
tag_index: "TRACKING"
|
||||
tag_index: "TRACK_TIME"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "START_POS"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "RESTART_POS"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "CANCEL_OBJECT_ID"
|
||||
}
|
||||
sync_set: {
|
||||
tag_index: "RA_TRACK"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options: {
|
||||
[mediapipe.BoxTrackerCalculatorOptions.ext]: {
|
||||
tracker_options: {
|
||||
track_step_options: {
|
||||
track_object_and_camera: true
|
||||
tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE
|
||||
inlier_spring_force: 0.0
|
||||
static_motion_temporal_ratio: 3e-2
|
||||
object_similarity_min_contd_inliers: 10
|
||||
}
|
||||
}
|
||||
visualize_tracking_data: false
|
||||
streaming_track_data_cache_size: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,319 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/node_hash_map.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.h"
|
||||
#include "mediapipe/util/tracking/tracked_detection.h"
|
||||
#include "mediapipe/util/tracking/tracked_detection_manager.h"
|
||||
#include "mediapipe/util/tracking/tracking.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
constexpr int kDetectionUpdateTimeOutMS = 5000;
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
constexpr char kDetectionBoxesTag[] = "DETECTION_BOXES";
|
||||
constexpr char kDetectionListTag[] = "DETECTION_LIST";
|
||||
constexpr char kTrackingBoxesTag[] = "TRACKING_BOXES";
|
||||
constexpr char kCancelObjectIdTag[] = "CANCEL_OBJECT_ID";
|
||||
|
||||
// Move |src| to the back of |dst|.
|
||||
void MoveIds(std::vector<int>* dst, std::vector<int> src) {
|
||||
dst->insert(dst->end(), std::make_move_iterator(src.begin()),
|
||||
std::make_move_iterator(src.end()));
|
||||
}
|
||||
|
||||
int64 GetInputTimestampMs(::mediapipe::CalculatorContext* cc) {
|
||||
return cc->InputTimestamp().Microseconds() / 1000; // 1 ms = 1000 us.
|
||||
}
|
||||
|
||||
// Converts a Mediapipe Detection Proto to a TrackedDetection class.
|
||||
std::unique_ptr<TrackedDetection> GetTrackedDetectionFromDetection(
|
||||
const Detection& detection, int64 timestamp) {
|
||||
std::unique_ptr<TrackedDetection> tracked_detection =
|
||||
absl::make_unique<TrackedDetection>(detection.detection_id(), timestamp);
|
||||
const float top = detection.location_data().relative_bounding_box().ymin();
|
||||
const float bottom =
|
||||
detection.location_data().relative_bounding_box().ymin() +
|
||||
detection.location_data().relative_bounding_box().height();
|
||||
const float left = detection.location_data().relative_bounding_box().xmin();
|
||||
const float right = detection.location_data().relative_bounding_box().xmin() +
|
||||
detection.location_data().relative_bounding_box().width();
|
||||
NormalizedRect bounding_box;
|
||||
bounding_box.set_x_center((left + right) / 2.f);
|
||||
bounding_box.set_y_center((top + bottom) / 2.f);
|
||||
bounding_box.set_height(bottom - top);
|
||||
bounding_box.set_width(right - left);
|
||||
tracked_detection->set_bounding_box(bounding_box);
|
||||
|
||||
for (int i = 0; i < detection.label_size(); ++i) {
|
||||
tracked_detection->AddLabel(detection.label(i), detection.score(i));
|
||||
}
|
||||
return tracked_detection;
|
||||
}
|
||||
|
||||
// Converts a TrackedDetection class to a Mediapipe Detection Proto.
|
||||
Detection GetAxisAlignedDetectionFromTrackedDetection(
|
||||
const TrackedDetection& tracked_detection) {
|
||||
Detection detection;
|
||||
LocationData* location_data = detection.mutable_location_data();
|
||||
|
||||
auto corners = tracked_detection.GetCorners();
|
||||
|
||||
float x_min = std::numeric_limits<float>::max();
|
||||
float x_max = std::numeric_limits<float>::min();
|
||||
float y_min = std::numeric_limits<float>::max();
|
||||
float y_max = std::numeric_limits<float>::min();
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
x_min = std::min(x_min, corners[i].x());
|
||||
x_max = std::max(x_max, corners[i].x());
|
||||
y_min = std::min(y_min, corners[i].y());
|
||||
y_max = std::max(y_max, corners[i].y());
|
||||
}
|
||||
location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX);
|
||||
LocationData::RelativeBoundingBox* relative_bbox =
|
||||
location_data->mutable_relative_bounding_box();
|
||||
relative_bbox->set_xmin(x_min);
|
||||
relative_bbox->set_ymin(y_min);
|
||||
relative_bbox->set_width(x_max - x_min);
|
||||
relative_bbox->set_height(y_max - y_min);
|
||||
|
||||
// Use previous id which is the id the object when it's first detected.
|
||||
if (tracked_detection.previous_id() > 0) {
|
||||
detection.set_detection_id(tracked_detection.previous_id());
|
||||
} else {
|
||||
detection.set_detection_id(tracked_detection.unique_id());
|
||||
}
|
||||
for (const auto& label_and_score : tracked_detection.label_to_score_map()) {
|
||||
detection.add_label(label_and_score.first);
|
||||
detection.add_score(label_and_score.second);
|
||||
}
|
||||
return detection;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// TrackedDetectionManagerCalculator accepts detections and tracking results at
|
||||
// different frame rate for real time tracking of targets.
|
||||
// Input:
|
||||
// DETECTIONS: A vector<Detection> of newly detected targets.
|
||||
// TRACKING_BOXES: A TimedBoxProtoList which contains a list of tracked boxes
|
||||
// from previous detections.
|
||||
//
|
||||
// Output:
|
||||
// CANCEL_OBJECT_ID: Ids of targets that are missing/lost such that it should
|
||||
// be removed from tracking.
|
||||
// DETECTIONS: List of detections that are being tracked.
|
||||
// DETECTION_BOXES: List of bounding boxes of detections that are being
|
||||
// tracked.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "TrackedDetectionManagerCalculator"
|
||||
// input_stream: "DETECTIONS:detections"
|
||||
// input_stream: "TRACKING_BOXES:boxes"
|
||||
// output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
// output_stream: "DETECTIONS:output_detections"
|
||||
// }
|
||||
class TrackedDetectionManagerCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
// Adds new list of detections to |waiting_for_update_detections_|.
|
||||
void AddDetectionList(const DetectionList& detection_list,
|
||||
CalculatorContext* cc);
|
||||
void AddDetections(const std::vector<Detection>& detections,
|
||||
CalculatorContext* cc);
|
||||
|
||||
// Manages existing and new detections.
|
||||
TrackedDetectionManager tracked_detection_manager_;
|
||||
|
||||
// Set of detections that are not up to date yet. These detections will be
|
||||
// added to the detection manager until they got updated from the box tracker.
|
||||
absl::node_hash_map<int, std::unique_ptr<TrackedDetection>>
|
||||
waiting_for_update_detections_;
|
||||
};
|
||||
REGISTER_CALCULATOR(TrackedDetectionManagerCalculator);
|
||||
|
||||
::mediapipe::Status TrackedDetectionManagerCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
if (cc->Inputs().HasTag(kDetectionsTag)) {
|
||||
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kDetectionListTag)) {
|
||||
cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kTrackingBoxesTag)) {
|
||||
cc->Inputs().Tag(kTrackingBoxesTag).Set<TimedBoxProtoList>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kCancelObjectIdTag)) {
|
||||
cc->Outputs().Tag(kCancelObjectIdTag).Set<int>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kDetectionsTag)) {
|
||||
cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
|
||||
cc->Outputs().Tag(kDetectionBoxesTag).Set<std::vector<NormalizedRect>>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TrackedDetectionManagerCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
if (cc->Inputs().HasTag("TRACKING_BOXES")) {
|
||||
if (!cc->Inputs().Tag("TRACKING_BOXES").IsEmpty()) {
|
||||
const TimedBoxProtoList& tracked_boxes =
|
||||
cc->Inputs().Tag("TRACKING_BOXES").Get<TimedBoxProtoList>();
|
||||
|
||||
// Collect all detections that are removed.
|
||||
auto removed_detection_ids = absl::make_unique<std::vector<int>>();
|
||||
for (const TimedBoxProto& tracked_box : tracked_boxes.box()) {
|
||||
NormalizedRect bounding_box;
|
||||
bounding_box.set_x_center((tracked_box.left() + tracked_box.right()) /
|
||||
2.f);
|
||||
bounding_box.set_y_center((tracked_box.bottom() + tracked_box.top()) /
|
||||
2.f);
|
||||
bounding_box.set_height(tracked_box.bottom() - tracked_box.top());
|
||||
bounding_box.set_width(tracked_box.right() - tracked_box.left());
|
||||
bounding_box.set_rotation(tracked_box.rotation());
|
||||
// First check if this box updates a detection that's waiting for
|
||||
// update from the tracker.
|
||||
auto waiting_for_update_detectoin_ptr =
|
||||
waiting_for_update_detections_.find(tracked_box.id());
|
||||
if (waiting_for_update_detectoin_ptr !=
|
||||
waiting_for_update_detections_.end()) {
|
||||
// Add the detection and remove duplicated detections.
|
||||
auto removed_ids = tracked_detection_manager_.AddDetection(
|
||||
std::move(waiting_for_update_detectoin_ptr->second));
|
||||
MoveIds(removed_detection_ids.get(), std::move(removed_ids));
|
||||
|
||||
waiting_for_update_detections_.erase(
|
||||
waiting_for_update_detectoin_ptr);
|
||||
}
|
||||
auto removed_ids = tracked_detection_manager_.UpdateDetectionLocation(
|
||||
tracked_box.id(), bounding_box, tracked_box.time_msec());
|
||||
MoveIds(removed_detection_ids.get(), std::move(removed_ids));
|
||||
}
|
||||
// TODO: Should be handled automatically in detection manager.
|
||||
auto removed_ids = tracked_detection_manager_.RemoveObsoleteDetections(
|
||||
GetInputTimestampMs(cc) - kDetectionUpdateTimeOutMS);
|
||||
MoveIds(removed_detection_ids.get(), std::move(removed_ids));
|
||||
|
||||
// TODO: Should be handled automatically in detection manager.
|
||||
removed_ids = tracked_detection_manager_.RemoveOutOfViewDetections();
|
||||
MoveIds(removed_detection_ids.get(), std::move(removed_ids));
|
||||
|
||||
if (!removed_detection_ids->empty() &&
|
||||
cc->Outputs().HasTag(kCancelObjectIdTag)) {
|
||||
auto timestamp = cc->InputTimestamp();
|
||||
for (int box_id : *removed_detection_ids) {
|
||||
// The timestamp is incremented (by 1 us) because currently the box
|
||||
// tracker calculator only accepts one cancel object ID for any given
|
||||
// timestamp.
|
||||
cc->Outputs()
|
||||
.Tag(kCancelObjectIdTag)
|
||||
.AddPacket(mediapipe::MakePacket<int>(box_id).At(timestamp++));
|
||||
}
|
||||
}
|
||||
|
||||
// Output detections and corresponding bounding boxes.
|
||||
const auto& all_detections =
|
||||
tracked_detection_manager_.GetAllTrackedDetections();
|
||||
auto output_detections = absl::make_unique<std::vector<Detection>>();
|
||||
auto output_boxes = absl::make_unique<std::vector<NormalizedRect>>();
|
||||
|
||||
for (const auto& detection_ptr : all_detections) {
|
||||
const auto& detection = *detection_ptr.second;
|
||||
// Only output detections that are synced.
|
||||
if (detection.last_updated_timestamp() <
|
||||
cc->InputTimestamp().Microseconds() / 1000) {
|
||||
continue;
|
||||
}
|
||||
output_detections->emplace_back(
|
||||
GetAxisAlignedDetectionFromTrackedDetection(detection));
|
||||
output_boxes->emplace_back(detection.bounding_box());
|
||||
}
|
||||
if (cc->Outputs().HasTag(kDetectionsTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kDetectionsTag)
|
||||
.Add(output_detections.release(), cc->InputTimestamp());
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
|
||||
cc->Outputs()
|
||||
.Tag(kDetectionBoxesTag)
|
||||
.Add(output_boxes.release(), cc->InputTimestamp());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kDetectionsTag) &&
|
||||
!cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
|
||||
const auto detections =
|
||||
cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
|
||||
AddDetections(detections, cc);
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kDetectionListTag) &&
|
||||
!cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
|
||||
const auto detection_list =
|
||||
cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
|
||||
AddDetectionList(detection_list, cc);
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
void TrackedDetectionManagerCalculator::AddDetectionList(
|
||||
const DetectionList& detection_list, CalculatorContext* cc) {
|
||||
for (const auto& detection : detection_list.detection()) {
|
||||
// Convert from microseconds to milliseconds.
|
||||
std::unique_ptr<TrackedDetection> new_detection =
|
||||
GetTrackedDetectionFromDetection(
|
||||
detection, cc->InputTimestamp().Microseconds() / 1000);
|
||||
|
||||
const int id = new_detection->unique_id();
|
||||
waiting_for_update_detections_[id] = std::move(new_detection);
|
||||
}
|
||||
}
|
||||
|
||||
void TrackedDetectionManagerCalculator::AddDetections(
|
||||
const std::vector<Detection>& detections, CalculatorContext* cc) {
|
||||
for (const auto& detection : detections) {
|
||||
// Convert from microseconds to milliseconds.
|
||||
std::unique_ptr<TrackedDetection> new_detection =
|
||||
GetTrackedDetectionFromDetection(
|
||||
detection, cc->InputTimestamp().Microseconds() / 1000);
|
||||
|
||||
const int id = new_detection->unique_id();
|
||||
waiting_for_update_detections_[id] = std::move(new_detection);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
709
mediapipe/calculators/video/tracking_graph_test.cc
Normal file
|
|
@ -0,0 +1,709 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/calculators/video/box_tracker_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#include "mediapipe/framework/port/advanced_proto_inc.h"
|
||||
#include "mediapipe/framework/port/file_helpers.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/opencv_highgui_inc.h"
|
||||
#include "mediapipe/framework/port/proto_ns.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.pb.h"
|
||||
#include "mediapipe/util/tracking/tracking.pb.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <CoreFoundation/CoreFoundation.h>
|
||||
#endif // defined(__APPLE__)
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
using ::testing::FloatNear;
|
||||
using ::testing::Test;
|
||||
|
||||
std::string GetTestDir() {
|
||||
#ifdef __APPLE__
|
||||
char path[1024];
|
||||
CFURLRef bundle_url = CFBundleCopyBundleURL(CFBundleGetMainBundle());
|
||||
CFURLGetFileSystemRepresentation(
|
||||
bundle_url, true, reinterpret_cast<UInt8*>(path), sizeof(path));
|
||||
CFRelease(bundle_url);
|
||||
return ::mediapipe::file::JoinPath(path, "testdata");
|
||||
#elif defined(__ANDROID__)
|
||||
char path[1024];
|
||||
getcwd(path, sizeof(path));
|
||||
return ::mediapipe::file::JoinPath(path,
|
||||
"mediapipe/calculators/video/testdata");
|
||||
#else
|
||||
return ::mediapipe::file::JoinPath(
|
||||
"./",
|
||||
// This should match the path of the output files
|
||||
// of the genrule() that generates test model files.
|
||||
"mediapipe/calculators/video/testdata");
|
||||
#endif // defined(__APPLE__)
|
||||
}
|
||||
|
||||
bool LoadBinaryTestGraph(const std::string& graph_path,
|
||||
CalculatorGraphConfig* config) {
|
||||
std::ifstream ifs;
|
||||
ifs.open(graph_path.c_str());
|
||||
proto_ns::io::IstreamInputStream in_stream(&ifs);
|
||||
bool success = config->ParseFromZeroCopyStream(&in_stream);
|
||||
ifs.close();
|
||||
if (!success) {
|
||||
LOG(ERROR) << "could not parse test graph: " << graph_path;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
class TrackingGraphTest : public Test {
|
||||
protected:
|
||||
TrackingGraphTest() {}
|
||||
|
||||
void SetUp() override {
|
||||
test_dir_ = GetTestDir();
|
||||
const auto graph_path = file::JoinPath(test_dir_, "tracker.binarypb");
|
||||
ASSERT_TRUE(LoadBinaryTestGraph(graph_path, &config_));
|
||||
|
||||
original_image_ = cv::imread(file::JoinPath(test_dir_, "lenna.png"));
|
||||
CreateInputFramesFromOriginalImage(kNumImages, kTranslationStep,
|
||||
&input_frames_packets_);
|
||||
|
||||
const auto& first_input_img = input_frames_packets_[0].Get<ImageFrame>();
|
||||
const int img_width = first_input_img.Width();
|
||||
const int img_height = first_input_img.Height();
|
||||
translation_step_x_ = kTranslationStep / static_cast<float>(img_width);
|
||||
translation_step_y_ = kTranslationStep / static_cast<float>(img_height);
|
||||
|
||||
// Creat new configure and packet dump vector to store output.
|
||||
mediapipe::CalculatorGraphConfig config_copy = config_;
|
||||
mediapipe::tool::AddVectorSink("boxes", &config_copy, &output_packets_);
|
||||
mediapipe::tool::AddVectorSink("ra_boxes", &config_copy,
|
||||
&random_access_results_packets_);
|
||||
|
||||
// Initialize graph
|
||||
MP_ASSERT_OK(graph_.Initialize(config_copy));
|
||||
|
||||
const auto parallel_graph_path =
|
||||
file::JoinPath(test_dir_, "parallel_tracker.binarypb");
|
||||
CalculatorGraphConfig parallel_config;
|
||||
ASSERT_TRUE(LoadBinaryTestGraph(parallel_graph_path, ¶llel_config));
|
||||
mediapipe::tool::AddVectorSink("boxes", ¶llel_config, &output_packets_);
|
||||
mediapipe::tool::AddVectorSink("ra_boxes", ¶llel_config,
|
||||
&random_access_results_packets_);
|
||||
MP_ASSERT_OK(parallel_graph_.Initialize(parallel_config));
|
||||
}
|
||||
|
||||
void CreateInputFramesFromOriginalImage(
|
||||
int num_images, int translation_step,
|
||||
std::vector<Packet>* input_frames_packets);
|
||||
|
||||
void TearDown() override {
|
||||
output_packets_.clear();
|
||||
random_access_results_packets_.clear();
|
||||
}
|
||||
|
||||
std::unique_ptr<TimedBoxProtoList> MakeBoxList(
|
||||
const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
|
||||
const std::vector<bool>& is_pnp_tracking,
|
||||
const std::vector<bool>& reacquisition) const;
|
||||
|
||||
void RunGraphWithSidePacketsAndInputs(
|
||||
const std::map<std::string, mediapipe::Packet>& side_packets,
|
||||
const mediapipe::Packet& start_pos_packet);
|
||||
|
||||
// Utility functions used to judge if a given quad or box is near to the
|
||||
// groundtruth location at a given frame.
|
||||
// Examine box.reacquisition() field equals to `reacquisition`.
|
||||
// `frame` can be float number to account for inter-frame interpolation.
|
||||
void ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
|
||||
bool reacquisition);
|
||||
|
||||
// Examine box.aspect_ratio() field equals to `aspect_ratio` if asepct_ratio
|
||||
// is positive.
|
||||
void ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
|
||||
float aspect_ratio, bool reacquisition);
|
||||
|
||||
// Utility function to judge if two quad are near to each other.
|
||||
void ExpectQuadNear(const TimedBoxProto& box1, const TimedBoxProto& box2);
|
||||
|
||||
std::unique_ptr<TimedBoxProtoList> CreateRandomAccessTrackingBoxList(
|
||||
const std::vector<Timestamp>& start_timestamps,
|
||||
const std::vector<Timestamp>& end_timestamps) const;
|
||||
|
||||
CalculatorGraph graph_;
|
||||
CalculatorGraph parallel_graph_;
|
||||
CalculatorGraphConfig config_;
|
||||
std::string test_dir_;
|
||||
cv::Mat original_image_;
|
||||
std::vector<Packet> input_frames_packets_;
|
||||
std::vector<mediapipe::Packet> output_packets_;
|
||||
std::vector<mediapipe::Packet> random_access_results_packets_;
|
||||
float translation_step_x_; // normalized translation step in x direction
|
||||
float translation_step_y_; // normalized translation step in y direction
|
||||
static constexpr float kInitialBoxHalfWidthNormalized = 0.25f;
|
||||
static constexpr float kInitialBoxHalfHeightNormalized = 0.25f;
|
||||
static constexpr float kImageAspectRatio = 1.0f; // for lenna.png
|
||||
static constexpr float kInitialBoxLeft =
|
||||
0.5f - kInitialBoxHalfWidthNormalized;
|
||||
static constexpr float kInitialBoxRight =
|
||||
0.5f + kInitialBoxHalfWidthNormalized;
|
||||
static constexpr float kInitialBoxTop =
|
||||
0.5f - kInitialBoxHalfHeightNormalized;
|
||||
static constexpr float kInitialBoxBottom =
|
||||
0.5f + kInitialBoxHalfHeightNormalized;
|
||||
static constexpr int kFrameIntervalUs = 30000;
|
||||
static constexpr int kNumImages = 8;
|
||||
// Each image is shifted to the right and bottom by kTranslationStep
|
||||
// pixels compared with the previous image.
|
||||
static constexpr int kTranslationStep = 10;
|
||||
static constexpr float kEqualityTolerance = 3e-4f;
|
||||
};
|
||||
|
||||
void TrackingGraphTest::ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
|
||||
bool reacquisition) {
|
||||
EXPECT_EQ(box.reacquisition(), reacquisition);
|
||||
EXPECT_TRUE(box.has_rotation());
|
||||
EXPECT_THAT(box.rotation(), FloatNear(0, kEqualityTolerance));
|
||||
EXPECT_THAT(box.left(),
|
||||
FloatNear(kInitialBoxLeft - frame * translation_step_x_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(box.top(), FloatNear(kInitialBoxTop - frame * translation_step_y_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(box.bottom(),
|
||||
FloatNear(kInitialBoxBottom - frame * translation_step_y_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(box.right(),
|
||||
FloatNear(kInitialBoxRight - frame * translation_step_x_,
|
||||
kEqualityTolerance));
|
||||
}
|
||||
|
||||
void TrackingGraphTest::ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
|
||||
float aspect_ratio,
|
||||
bool reacquisition) {
|
||||
EXPECT_TRUE(box.has_quad()) << "quad must exist!";
|
||||
if (aspect_ratio > 0) {
|
||||
EXPECT_TRUE(box.has_aspect_ratio());
|
||||
EXPECT_NEAR(box.aspect_ratio(), aspect_ratio, kEqualityTolerance);
|
||||
}
|
||||
|
||||
EXPECT_EQ(box.reacquisition(), reacquisition);
|
||||
|
||||
const auto& quad = box.quad();
|
||||
EXPECT_EQ(8, quad.vertices_size())
|
||||
<< "quad has only " << box.quad().vertices_size() << " vertices";
|
||||
EXPECT_THAT(quad.vertices(0),
|
||||
FloatNear(kInitialBoxLeft - frame * translation_step_x_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(quad.vertices(1),
|
||||
FloatNear(kInitialBoxTop - frame * translation_step_y_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(quad.vertices(3),
|
||||
FloatNear(kInitialBoxBottom - frame * translation_step_y_,
|
||||
kEqualityTolerance));
|
||||
EXPECT_THAT(quad.vertices(4),
|
||||
FloatNear(kInitialBoxRight - frame * translation_step_x_,
|
||||
kEqualityTolerance));
|
||||
}
|
||||
|
||||
void TrackingGraphTest::ExpectQuadNear(const TimedBoxProto& box1,
|
||||
const TimedBoxProto& box2) {
|
||||
EXPECT_TRUE(box1.has_quad());
|
||||
EXPECT_TRUE(box2.has_quad());
|
||||
EXPECT_EQ(8, box1.quad().vertices_size())
|
||||
<< "quad has only " << box1.quad().vertices_size() << " vertices";
|
||||
EXPECT_EQ(8, box2.quad().vertices_size())
|
||||
<< "quad has only " << box2.quad().vertices_size() << " vertices";
|
||||
for (int j = 0; j < box1.quad().vertices_size(); ++j) {
|
||||
EXPECT_NEAR(box1.quad().vertices(j), box2.quad().vertices(j),
|
||||
kEqualityTolerance);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<TimedBoxProtoList> TrackingGraphTest::MakeBoxList(
|
||||
const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
|
||||
const std::vector<bool>& is_pnp_tracking,
|
||||
const std::vector<bool>& reacquisition) const {
|
||||
auto box_list = absl::make_unique<TimedBoxProtoList>();
|
||||
int box_id = 0;
|
||||
for (int j = 0; j < is_quad_tracking.size(); ++j) {
|
||||
TimedBoxProto* box = box_list->add_box();
|
||||
if (is_quad_tracking[j]) {
|
||||
box->mutable_quad()->add_vertices(kInitialBoxLeft);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxTop);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxLeft);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxBottom);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxRight);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxBottom);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxRight);
|
||||
box->mutable_quad()->add_vertices(kInitialBoxTop);
|
||||
|
||||
if (is_pnp_tracking[j]) {
|
||||
box->set_aspect_ratio(kImageAspectRatio);
|
||||
}
|
||||
} else {
|
||||
box->set_left(kInitialBoxLeft);
|
||||
box->set_right(kInitialBoxRight);
|
||||
box->set_top(kInitialBoxTop);
|
||||
box->set_bottom(kInitialBoxBottom);
|
||||
}
|
||||
box->set_id(box_id++);
|
||||
box->set_time_msec(timestamp.Value() / 1000);
|
||||
box->set_reacquisition(reacquisition[j]);
|
||||
}
|
||||
|
||||
return box_list;
|
||||
}
|
||||
|
||||
void TrackingGraphTest::CreateInputFramesFromOriginalImage(
|
||||
int num_images, int translation_step,
|
||||
std::vector<Packet>* input_frames_packets) {
|
||||
const int crop_width = original_image_.cols - num_images * translation_step;
|
||||
const int crop_height = original_image_.rows - num_images * translation_step;
|
||||
for (int i = 0; i < num_images; ++i) {
|
||||
cv::Rect roi(i * translation_step, i * translation_step, crop_width,
|
||||
crop_height);
|
||||
cv::Mat cropped_img = cv::Mat(original_image_, roi);
|
||||
auto cropped_image_frame = absl::make_unique<ImageFrame>(
|
||||
ImageFormat::SRGB, crop_width, crop_height, cropped_img.step[0],
|
||||
cropped_img.data, ImageFrame::PixelDataDeleter::kNone);
|
||||
Timestamp curr_timestamp = Timestamp(i * kFrameIntervalUs);
|
||||
Packet image_packet =
|
||||
Adopt(cropped_image_frame.release()).At(curr_timestamp);
|
||||
input_frames_packets->push_back(image_packet);
|
||||
}
|
||||
}
|
||||
|
||||
void TrackingGraphTest::RunGraphWithSidePacketsAndInputs(
|
||||
const std::map<std::string, mediapipe::Packet>& side_packets,
|
||||
const mediapipe::Packet& start_pos_packet) {
|
||||
// Start running the graph
|
||||
MP_EXPECT_OK(graph_.StartRun(side_packets));
|
||||
|
||||
MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
|
||||
|
||||
for (auto frame_packet : input_frames_packets_) {
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
|
||||
MP_EXPECT_OK(graph_.WaitUntilIdle());
|
||||
}
|
||||
|
||||
MP_EXPECT_OK(graph_.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(graph_.WaitUntilDone());
|
||||
}
|
||||
|
||||
std::unique_ptr<TimedBoxProtoList>
|
||||
TrackingGraphTest::CreateRandomAccessTrackingBoxList(
|
||||
const std::vector<Timestamp>& start_timestamps,
|
||||
const std::vector<Timestamp>& end_timestamps) const {
|
||||
CHECK_EQ(start_timestamps.size(), end_timestamps.size());
|
||||
auto ra_boxes = absl::make_unique<TimedBoxProtoList>();
|
||||
for (int i = 0; i < start_timestamps.size(); ++i) {
|
||||
auto start_box_list =
|
||||
MakeBoxList(start_timestamps[i], std::vector<bool>{true},
|
||||
std::vector<bool>{true}, std::vector<bool>{false});
|
||||
auto end_box_list =
|
||||
MakeBoxList(end_timestamps[i], std::vector<bool>{true},
|
||||
std::vector<bool>{true}, std::vector<bool>{false});
|
||||
*(ra_boxes->add_box()) = (*start_box_list).box(0);
|
||||
*(ra_boxes->add_box()) = (*end_box_list).box(0);
|
||||
}
|
||||
return ra_boxes;
|
||||
}
|
||||
|
||||
TEST_F(TrackingGraphTest, BasicBoxTrackingSanityCheck) {
|
||||
// Create input side packets.
|
||||
std::map<std::string, mediapipe::Packet> side_packets;
|
||||
side_packets.insert(std::make_pair("analysis_downsample_factor",
|
||||
mediapipe::MakePacket<float>(1.0f)));
|
||||
side_packets.insert(std::make_pair(
|
||||
"calculator_options",
|
||||
mediapipe::MakePacket<CalculatorOptions>(CalculatorOptions())));
|
||||
|
||||
// Run the graph with input side packets, start_pos, and input image frames.
|
||||
Timestamp start_box_time = input_frames_packets_[0].Timestamp();
|
||||
// is_quad_tracking is used to indicate whether to track quad for each
|
||||
// individual box.
|
||||
std::vector<bool> is_quad_tracking{false};
|
||||
// is_pnp_tracking is used to indicate whether to use perspective transform to
|
||||
// track quad.
|
||||
std::vector<bool> is_pnp_tracking{false};
|
||||
// is_reacquisition is used to indicate whether to enable reacquisition for
|
||||
// the box.
|
||||
std::vector<bool> is_reacquisition{false};
|
||||
auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
|
||||
is_pnp_tracking, is_reacquisition);
|
||||
Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
|
||||
RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
|
||||
|
||||
EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
|
||||
|
||||
for (int i = 0; i < output_packets_.size(); ++i) {
|
||||
const TimedBoxProtoList& boxes =
|
||||
output_packets_[i].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
|
||||
ExpectBoxAtFrame(boxes.box(0), i, false);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TrackingGraphTest, BasicQuadTrackingSanityCheck) {
|
||||
// Create input side packets.
|
||||
std::map<std::string, mediapipe::Packet> side_packets;
|
||||
side_packets.insert(std::make_pair("analysis_downsample_factor",
|
||||
mediapipe::MakePacket<float>(1.0f)));
|
||||
CalculatorOptions calculator_options;
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->mutable_tracker_options()
|
||||
->mutable_track_step_options()
|
||||
->set_tracking_degrees(
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
|
||||
side_packets.insert(std::make_pair(
|
||||
"calculator_options",
|
||||
mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
|
||||
|
||||
Timestamp start_box_time = input_frames_packets_[0].Timestamp();
|
||||
// Box id 0 use quad tracking with 8DoF homography transform.
|
||||
// Box id 1 use quad tracking with 6DoF perspective transform.
|
||||
// Box id 2 use box tracking with 4DoF similarity transform.
|
||||
std::vector<bool> is_quad_tracking{true, true, false};
|
||||
std::vector<bool> is_pnp_tracking{false, true, false};
|
||||
std::vector<bool> is_reacquisition{true, false, true};
|
||||
auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
|
||||
is_pnp_tracking, is_reacquisition);
|
||||
Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
|
||||
RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
|
||||
|
||||
EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
|
||||
for (int i = 0; i < output_packets_.size(); ++i) {
|
||||
const TimedBoxProtoList& boxes =
|
||||
output_packets_[i].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
|
||||
for (int j = 0; j < boxes.box_size(); ++j) {
|
||||
const TimedBoxProto& box = boxes.box(j);
|
||||
if (is_quad_tracking[box.id()]) {
|
||||
ExpectQuadAtFrame(box, i,
|
||||
is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
|
||||
is_reacquisition[box.id()]);
|
||||
} else {
|
||||
ExpectBoxAtFrame(box, i, is_reacquisition[box.id()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TrackingGraphTest, TestRandomAccessTrackingResults) {
|
||||
// Create input side packets.
|
||||
std::map<std::string, mediapipe::Packet> side_packets;
|
||||
side_packets.insert(std::make_pair("analysis_downsample_factor",
|
||||
mediapipe::MakePacket<float>(1.0f)));
|
||||
CalculatorOptions calculator_options;
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->mutable_tracker_options()
|
||||
->mutable_track_step_options()
|
||||
->set_tracking_degrees(
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
|
||||
side_packets.insert(std::make_pair(
|
||||
"calculator_options",
|
||||
mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
|
||||
|
||||
ASSERT_GT(input_frames_packets_.size(), 2); // at least 3 frames
|
||||
ASSERT_TRUE(input_frames_packets_[2].Timestamp() -
|
||||
input_frames_packets_[1].Timestamp() >
|
||||
TimestampDiff(1000));
|
||||
|
||||
constexpr int start_frame = 0;
|
||||
Timestamp start_box_time = input_frames_packets_[start_frame].Timestamp();
|
||||
auto start_box_list =
|
||||
MakeBoxList(start_box_time, std::vector<bool>{true},
|
||||
std::vector<bool>{true}, std::vector<bool>{false});
|
||||
constexpr int end_frame = 2;
|
||||
Timestamp end_box_time = input_frames_packets_[end_frame].Timestamp();
|
||||
|
||||
// Also test reverse random access tracking.
|
||||
// This offset of 1ms is simulating the case where the start query timestamp
|
||||
// to be not any existing frame timestamp. In reality, it's highly encouraged
|
||||
// to have the start query timestamp be aligned with frame timestamp.
|
||||
constexpr int reverse_start_frame = 1;
|
||||
Timestamp reverse_start_box_time =
|
||||
input_frames_packets_[reverse_start_frame].Timestamp() + 1000;
|
||||
|
||||
auto ra_boxes = CreateRandomAccessTrackingBoxList(
|
||||
{start_box_time, reverse_start_box_time}, {end_box_time, start_box_time});
|
||||
|
||||
Packet ra_packet = Adopt(ra_boxes.release()).At(start_box_time);
|
||||
Packet start_packet = Adopt(start_box_list.release()).At(start_box_time);
|
||||
|
||||
// Start running the ordinary graph, verify random access produce same result
|
||||
// as normal tracking.
|
||||
MP_EXPECT_OK(graph_.StartRun(side_packets));
|
||||
MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_packet));
|
||||
for (auto frame_packet : input_frames_packets_) {
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
|
||||
Packet track_time_packet = Adopt(new int(0)).At(frame_packet.Timestamp());
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("track_time", track_time_packet));
|
||||
MP_EXPECT_OK(graph_.WaitUntilIdle());
|
||||
}
|
||||
MP_EXPECT_OK(graph_.AddPacketToInputStream("ra_track", ra_packet));
|
||||
MP_EXPECT_OK(graph_.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(graph_.WaitUntilDone());
|
||||
|
||||
EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
|
||||
const TimedBoxProtoList tracking_result =
|
||||
output_packets_[end_frame].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(1, tracking_result.box_size());
|
||||
|
||||
// Should have 1 random access packet.
|
||||
EXPECT_EQ(1, random_access_results_packets_.size());
|
||||
const TimedBoxProtoList& ra_result =
|
||||
random_access_results_packets_[0].Get<TimedBoxProtoList>();
|
||||
// Two box tracking results. One for comparison with normal tracking. The
|
||||
// other for reverse random access tracking.
|
||||
EXPECT_EQ(2, ra_result.box_size());
|
||||
|
||||
// Check if randan access tracking has same result with normal tracking.
|
||||
ExpectQuadNear(tracking_result.box(0), ra_result.box(0));
|
||||
ExpectQuadAtFrame(ra_result.box(0), end_frame - start_frame,
|
||||
kImageAspectRatio, false);
|
||||
ExpectQuadAtFrame(ra_result.box(1), start_frame - reverse_start_frame - 1,
|
||||
kImageAspectRatio, false);
|
||||
|
||||
// Clear output and ra result packet vector before test parallel graph.
|
||||
TearDown();
|
||||
|
||||
// Start running the parallel graph, verify random access produce same result
|
||||
// as normal tracking.
|
||||
MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
|
||||
MP_EXPECT_OK(
|
||||
parallel_graph_.AddPacketToInputStream("start_pos", start_packet));
|
||||
for (auto frame_packet : input_frames_packets_) {
|
||||
MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
|
||||
frame_packet));
|
||||
MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
|
||||
}
|
||||
MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
|
||||
MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
|
||||
|
||||
EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
|
||||
const TimedBoxProtoList parallel_tracking_result =
|
||||
output_packets_[end_frame].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(1, parallel_tracking_result.box_size());
|
||||
|
||||
// should have only 1 random access
|
||||
EXPECT_EQ(1, random_access_results_packets_.size());
|
||||
const TimedBoxProtoList& parallel_ra_result =
|
||||
random_access_results_packets_[0].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(2, parallel_ra_result.box_size());
|
||||
|
||||
// Check if randan access tracking has same result with normal tracking.
|
||||
ExpectQuadNear(parallel_tracking_result.box(0), parallel_ra_result.box(0));
|
||||
ExpectQuadAtFrame(parallel_ra_result.box(0), end_frame - start_frame,
|
||||
kImageAspectRatio, false);
|
||||
ExpectQuadAtFrame(parallel_ra_result.box(1),
|
||||
start_frame - reverse_start_frame - 1, kImageAspectRatio,
|
||||
false);
|
||||
}
|
||||
|
||||
// Tests what happens when random access request timestamps are
|
||||
// outside of cache.
|
||||
TEST_F(TrackingGraphTest, TestRandomAccessTrackingTimestamps) {
|
||||
// Create input side packets.
|
||||
std::map<std::string, mediapipe::Packet> side_packets;
|
||||
side_packets.insert(std::make_pair("analysis_downsample_factor",
|
||||
mediapipe::MakePacket<float>(1.0f)));
|
||||
CalculatorOptions calculator_options;
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->mutable_tracker_options()
|
||||
->mutable_track_step_options()
|
||||
->set_tracking_degrees(
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
|
||||
// We intentionally don't cache all frames, to see what happens when
|
||||
// random access tracking request time falls outside cache range.
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->set_streaming_track_data_cache_size(input_frames_packets_.size() - 1);
|
||||
side_packets.insert(std::make_pair(
|
||||
"calculator_options",
|
||||
mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
|
||||
|
||||
// Set up random access boxes
|
||||
const int num_frames = input_frames_packets_.size();
|
||||
const int64 usec_in_sec = 1000000;
|
||||
std::vector<Timestamp> start_timestamps{
|
||||
input_frames_packets_[0].Timestamp() - usec_in_sec, // forward
|
||||
input_frames_packets_[0].Timestamp(), // forward
|
||||
input_frames_packets_[1].Timestamp(), // forward
|
||||
input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec, // fwd
|
||||
input_frames_packets_[0].Timestamp(), // backward
|
||||
input_frames_packets_[num_frames - 1].Timestamp(), // backward
|
||||
input_frames_packets_[num_frames - 1].Timestamp(), // backward
|
||||
input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec // back
|
||||
};
|
||||
std::vector<Timestamp> end_timestamps{
|
||||
input_frames_packets_[num_frames - 1].Timestamp(),
|
||||
input_frames_packets_[num_frames - 1].Timestamp(),
|
||||
input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec,
|
||||
input_frames_packets_[num_frames - 1].Timestamp() + 2 * usec_in_sec,
|
||||
input_frames_packets_[0].Timestamp() - usec_in_sec,
|
||||
input_frames_packets_[0].Timestamp(),
|
||||
input_frames_packets_[0].Timestamp() - usec_in_sec,
|
||||
input_frames_packets_[1].Timestamp()};
|
||||
auto ra_boxes =
|
||||
CreateRandomAccessTrackingBoxList(start_timestamps, end_timestamps);
|
||||
Packet ra_packet =
|
||||
Adopt(ra_boxes.release()).At(input_frames_packets_[0].Timestamp());
|
||||
|
||||
// Run the graph and check if the outside-cache request have no results.
|
||||
// Start running the parallel graph, verify random access produce same result
|
||||
// as normal tracking.
|
||||
MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
|
||||
for (auto frame_packet : input_frames_packets_) {
|
||||
MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
|
||||
frame_packet));
|
||||
MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
|
||||
}
|
||||
MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
|
||||
MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
|
||||
|
||||
// should have 1 random access packet with 0 result boxes
|
||||
EXPECT_EQ(1, random_access_results_packets_.size());
|
||||
const auto& ra_returned_boxes =
|
||||
random_access_results_packets_[0].Get<TimedBoxProtoList>();
|
||||
const int num_returned_ra_boxes = ra_returned_boxes.box_size();
|
||||
EXPECT_EQ(0, num_returned_ra_boxes);
|
||||
}
|
||||
|
||||
TEST_F(TrackingGraphTest, TestTransitionFramesForReacquisition) {
|
||||
// Create input side packets.
|
||||
std::map<std::string, mediapipe::Packet> side_packets;
|
||||
side_packets.insert(std::make_pair("analysis_downsample_factor",
|
||||
mediapipe::MakePacket<float>(1.0f)));
|
||||
CalculatorOptions calculator_options;
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->mutable_tracker_options()
|
||||
->mutable_track_step_options()
|
||||
->set_tracking_degrees(
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
|
||||
constexpr int kTransitionFrames = 3;
|
||||
calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
|
||||
->set_start_pos_transition_frames(kTransitionFrames);
|
||||
|
||||
side_packets.insert(std::make_pair(
|
||||
"calculator_options",
|
||||
mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
|
||||
|
||||
Timestamp start_box_time = input_frames_packets_[0].Timestamp();
|
||||
// Box id 0 use quad tracking with 8DoF homography transform.
|
||||
// Box id 1 use quad tracking with 6DoF perspective transform.
|
||||
// Box id 2 use box tracking with 4DoF similarity transform.
|
||||
std::vector<bool> is_quad_tracking{true, true, false};
|
||||
std::vector<bool> is_pnp_tracking{false, true, false};
|
||||
std::vector<bool> is_reacquisition{true, true, true};
|
||||
auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
|
||||
is_pnp_tracking, is_reacquisition);
|
||||
Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
|
||||
|
||||
// Setting box pos restart from initial position (frame 0's position).
|
||||
constexpr int kRestartFrame = 3;
|
||||
Timestamp restart_box_time = input_frames_packets_[kRestartFrame].Timestamp();
|
||||
auto restart_box_list = MakeBoxList(restart_box_time, is_quad_tracking,
|
||||
is_pnp_tracking, is_reacquisition);
|
||||
Packet restart_pos_packet =
|
||||
Adopt(restart_box_list.release()).At(restart_box_time);
|
||||
MP_EXPECT_OK(graph_.StartRun(side_packets));
|
||||
MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
|
||||
|
||||
for (int j = 0; j < input_frames_packets_.size(); ++j) {
|
||||
// Add TRACK_TIME stream queries in between 2 frames.
|
||||
if (j > 0) {
|
||||
Timestamp track_time = Timestamp((j - 0.5f) * kFrameIntervalUs);
|
||||
LOG(INFO) << track_time.Value();
|
||||
Packet track_time_packet = Adopt(new Timestamp).At(track_time);
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("track_time", track_time_packet));
|
||||
}
|
||||
|
||||
MP_EXPECT_OK(graph_.AddPacketToInputStream("image_cpu_frames",
|
||||
input_frames_packets_[j]));
|
||||
Packet track_time_packet =
|
||||
Adopt(new int(0)).At(input_frames_packets_[j].Timestamp());
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("track_time", track_time_packet));
|
||||
MP_EXPECT_OK(graph_.WaitUntilIdle());
|
||||
|
||||
if (j == kRestartFrame) {
|
||||
MP_EXPECT_OK(
|
||||
graph_.AddPacketToInputStream("restart_pos", restart_pos_packet));
|
||||
}
|
||||
}
|
||||
|
||||
MP_EXPECT_OK(graph_.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(graph_.WaitUntilDone());
|
||||
|
||||
EXPECT_EQ(input_frames_packets_.size() * 2 - 1, output_packets_.size());
|
||||
for (int i = 0; i < output_packets_.size(); ++i) {
|
||||
const TimedBoxProtoList& boxes =
|
||||
output_packets_[i].Get<TimedBoxProtoList>();
|
||||
EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
|
||||
float frame_id = i / 2.0f;
|
||||
float expected_frame_id;
|
||||
if (frame_id <= kRestartFrame) {
|
||||
// before transition
|
||||
expected_frame_id = frame_id;
|
||||
} else {
|
||||
float transition_frames = frame_id - kRestartFrame;
|
||||
if (transition_frames <= kTransitionFrames) {
|
||||
// transitioning.
|
||||
expected_frame_id =
|
||||
kRestartFrame -
|
||||
transition_frames / kTransitionFrames * kRestartFrame +
|
||||
transition_frames;
|
||||
} else {
|
||||
// after transition.
|
||||
expected_frame_id = transition_frames;
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < boxes.box_size(); ++j) {
|
||||
const TimedBoxProto& box = boxes.box(j);
|
||||
if (is_quad_tracking[box.id()]) {
|
||||
ExpectQuadAtFrame(box, expected_frame_id,
|
||||
is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
|
||||
is_reacquisition[box.id()]);
|
||||
} else {
|
||||
ExpectBoxAtFrame(box, expected_frame_id, is_reacquisition[box.id()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Add test for reacquisition.
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
|
|
@ -39,6 +39,11 @@ pipeline.
|
|||
the same TFLite model in a CPU-based pipeline. This example highlights how
|
||||
graphs can be easily adapted to run on CPU v.s. GPU.
|
||||
|
||||
### Object Detection and Tracking with GPU
|
||||
|
||||
[Object Detection and Tracking with GPU](./object_tracking_mobile_gpu.md) illustrates how to
|
||||
use MediaPipe for object detection and tracking.
|
||||
|
||||
### Face Detection with GPU
|
||||
|
||||
[Face Detection with GPU](./face_detection_mobile_gpu.md) illustrates how to use
|
||||
|
|
|
|||
BIN
mediapipe/docs/images/mobile/box_tracking_subgraph.png
Normal file
|
After Width: | Height: | Size: 37 KiB |
BIN
mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
BIN
mediapipe/docs/images/mobile/object_tracking_android_gpu.gif
Normal file
|
After Width: | Height: | Size: 8.6 MiB |
|
After Width: | Height: | Size: 1.3 MiB |
BIN
mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png
Normal file
|
After Width: | Height: | Size: 31 KiB |
|
After Width: | Height: | Size: 20 KiB |
BIN
mediapipe/docs/images/mobile/object_tracking_subgraph.png
Normal file
|
After Width: | Height: | Size: 45 KiB |
|
|
@ -63,6 +63,7 @@ To build and run iOS apps:
|
|||
|
||||
```bash
|
||||
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
||||
libopencv-calib3d-dev libopencv-features2d-dev \
|
||||
libopencv-imgproc-dev libopencv-video-dev
|
||||
```
|
||||
|
||||
|
|
@ -411,6 +412,7 @@ To build and run iOS apps:
|
|||
|
||||
```bash
|
||||
username@DESKTOP-TMVLBJ1:~/mediapipe$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
||||
libopencv-calib3d-dev libopencv-features2d-dev \
|
||||
libopencv-imgproc-dev libopencv-video-dev
|
||||
```
|
||||
|
||||
|
|
|
|||
475
mediapipe/docs/object_tracking_mobile_gpu.md
Normal file
|
|
@ -0,0 +1,475 @@
|
|||
# Object Detection and Tracking
|
||||
|
||||
This doc focuses on the
|
||||
[below example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt)
|
||||
that performs object detection and tracking.
|
||||
Note that object detection is using TensorFlow Lite on GPU while tracking is using CPU.
|
||||
|
||||
For overall context on hand detection and hand tracking, please read this
|
||||
[Google Developer Blog](https://mediapipe.page.link/objecttrackingblog).
|
||||
|
||||

|
||||
|
||||
## Android
|
||||
|
||||
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu)
|
||||
|
||||
To build and install the app:
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu
|
||||
```
|
||||
|
||||
Once the app is built, install it on Android device with:
|
||||
|
||||
```bash
|
||||
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/objecttrackinggpu.apk
|
||||
```
|
||||
|
||||
## Graph
|
||||
The object detection and tracking [main graph](#main-graph) internally utilizes a
|
||||
[object detection subgraph](#object-detection-subgraph), a
|
||||
[object tracking subgraph](#object-tracking-subgraph) and a
|
||||
[renderer subgraph](#renderer-subgraph).
|
||||
|
||||
|
||||
The subgraphs show up in the main graph visualization as nodes colored in
|
||||
purple, and the subgraph itself can also be visualized just like a regular
|
||||
graph. For more information on how to visualize a graph that includes subgraphs,
|
||||
see the Visualizing Subgraphs section in the
|
||||
[visualizer documentation](./visualizer.md).
|
||||
|
||||
### Main Graph
|
||||
|
||||
|
||||

|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt)
|
||||
|
||||
```bash
|
||||
# MediaPipe graph that performs object detection and tracking.
|
||||
# Used in the examples in
|
||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Resamples the images by specific frame rate. This calculator is used to
|
||||
# control the frequecy of subsequent calculators/subgraphs, e.g. less power
|
||||
# consumption for expensive process.
|
||||
node {
|
||||
calculator: "PacketResamplerCalculator"
|
||||
input_stream: "DATA:input_video"
|
||||
output_stream: "DATA:throttled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
|
||||
frame_rate: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detections objects (see object_detection_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "ObjectDetectionSubgraph"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
}
|
||||
|
||||
# Subgraph that tracks objects (see object_tracking.pbtxt).
|
||||
node {
|
||||
calculator: "ObjectTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see renderer_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "RendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:tracked_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
```
|
||||
|
||||
### Object Detection Subgraph
|
||||
|
||||

|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt)
|
||||
|
||||
```bash
|
||||
# MediaPipe object detection subgraph.
|
||||
|
||||
type: "ObjectDetectionSubgraph"
|
||||
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
|
||||
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on GPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||
output_stream: "TENSORS_GPU:image_tensor"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS_GPU:image_tensor"
|
||||
output_stream: "TENSORS_GPU:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS_GPU:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
sigmoid_score: true
|
||||
apply_exponential_on_box_size: true
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
max_num_detections: 3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
return_empty_detections: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Object Tracking Subgraph
|
||||
|
||||
Object tracking subgraph uses a box tracking subgraph which is a generic
|
||||
tracking library that can be used for other use cases.
|
||||
|
||||

|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt)
|
||||
|
||||
```bash
|
||||
# MediaPipe object tracking subgraph.
|
||||
|
||||
type: "ObjectTrackingSubgraph"
|
||||
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "DETECTIONS:new_detections"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
|
||||
# Assigns an unique id for each new detection.
|
||||
node {
|
||||
calculator: "DetectionUniqueIdCalculator"
|
||||
input_stream: "DETECTIONS:new_detections"
|
||||
output_stream: "DETECTIONS:detections_with_id"
|
||||
}
|
||||
|
||||
# Converts detections to TimedBox protos which are used as initial location
|
||||
# for tracking.
|
||||
node {
|
||||
calculator: "DetectionsToTimedBoxListCalculator"
|
||||
input_stream: "DETECTIONS:detections_with_id"
|
||||
output_stream: "BOXES:start_pos"
|
||||
}
|
||||
|
||||
# Subgraph that tracks boxes (see box_tracking.pbtxt).
|
||||
node {
|
||||
calculator: "BoxTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "BOXES:start_pos"
|
||||
input_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "BOXES:boxes"
|
||||
}
|
||||
|
||||
# Managers new detected objects and objects that are being tracked.
|
||||
# It associates the duplicated detections and updates the locations of
|
||||
# detections from tracking.
|
||||
node: {
|
||||
calculator: "TrackedDetectionManagerCalculator"
|
||||
input_stream: "DETECTIONS:detections_with_id"
|
||||
input_stream: "TRACKING_BOXES:boxes"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
|
||||
input_stream_handler {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext] {
|
||||
sync_set {
|
||||
tag_index: "TRACKING_BOXES"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "DETECTIONS"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### Box Tracking Subgraph
|
||||
|
||||

|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt)
|
||||
|
||||
```bash
|
||||
# MediaPipe box tracking subgraph.
|
||||
|
||||
type: "BoxTrackingSubgraph"
|
||||
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "BOXES:start_pos"
|
||||
input_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "BOXES:boxes"
|
||||
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "IMAGE_GPU:downscaled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 240
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts GPU buffer to ImageFrame for processing tracking.
|
||||
node: {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "downscaled_input_video"
|
||||
output_stream: "downscaled_input_video_cpu"
|
||||
}
|
||||
|
||||
# Performs motion analysis on an incoming video stream.
|
||||
node: {
|
||||
calculator: "MotionAnalysisCalculator"
|
||||
input_stream: "VIDEO:downscaled_input_video_cpu"
|
||||
output_stream: "CAMERA:camera_motion"
|
||||
output_stream: "FLOW:region_flow"
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
|
||||
analysis_options {
|
||||
analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
|
||||
flow_options {
|
||||
fast_estimation_min_block_size: 100
|
||||
top_inlier_sets: 1
|
||||
frac_inlier_error_threshold: 3e-3
|
||||
downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
|
||||
verification_distance: 5.0
|
||||
verify_long_feature_acceleration: true
|
||||
verify_long_feature_trigger_ratio: 0.1
|
||||
tracking_options {
|
||||
max_features: 500
|
||||
adaptive_extraction_levels: 2
|
||||
min_eig_val_settings {
|
||||
adaptive_lowest_quality_level: 2e-4
|
||||
}
|
||||
klt_tracker_implementation: KLT_OPENCV
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Reads optical flow fields defined in
|
||||
# mediapipe/framework/formats/motion/optical_flow_field.h,
|
||||
# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
|
||||
# to 0-255.
|
||||
node: {
|
||||
calculator: "FlowPackagerCalculator"
|
||||
input_stream: "FLOW:region_flow"
|
||||
input_stream: "CAMERA:camera_motion"
|
||||
output_stream: "TRACKING:tracking_data"
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
|
||||
flow_packager_options: {
|
||||
binary_tracking_data_support: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Tracks box positions over time.
|
||||
node: {
|
||||
calculator: "BoxTrackerCalculator"
|
||||
input_stream: "TRACKING:tracking_data"
|
||||
input_stream: "TRACK_TIME:input_video"
|
||||
input_stream: "START_POS:start_pos"
|
||||
input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
input_stream_info: {
|
||||
tag_index: "CANCEL_OBJECT_ID"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "BOXES:boxes"
|
||||
|
||||
input_stream_handler {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext] {
|
||||
sync_set {
|
||||
tag_index: "TRACKING"
|
||||
tag_index: "TRACK_TIME"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "START_POS"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "CANCEL_OBJECT_ID"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
|
||||
tracker_options: {
|
||||
track_step_options {
|
||||
track_object_and_camera: true
|
||||
tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
|
||||
inlier_spring_force: 0.0
|
||||
static_motion_temporal_ratio: 3e-2
|
||||
}
|
||||
}
|
||||
visualize_tracking_data: false
|
||||
streaming_track_data_cache_size: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Renderer Subgraph
|
||||
|
||||

|
||||
|
||||
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt)
|
||||
|
||||
```bash
|
||||
# MediaPipe object tracking rendering subgraph.
|
||||
|
||||
type: "RendererSubgraph"
|
||||
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
render_detection_id: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "INPUT_FRAME_GPU:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
output_stream: "OUTPUT_FRAME_GPU:output_image"
|
||||
}
|
||||
|
||||
```
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.google.mediapipe.apps.objecttrackinggpu">
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="21"
|
||||
android:targetSdkVersion="27" />
|
||||
|
||||
<!-- For using the camera -->
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-feature android:name="android.hardware.camera" />
|
||||
<uses-feature android:name="android.hardware.camera.autofocus" />
|
||||
<!-- For MediaPipe -->
|
||||
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
|
||||
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:label="@string/app_name"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true"
|
||||
android:screenOrientation="portrait">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
cc_binary(
|
||||
name = "libmediapipe_jni.so",
|
||||
linkshared = 1,
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//mediapipe/graphs/tracking:mobile_calculators",
|
||||
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mediapipe_jni_lib",
|
||||
srcs = [":libmediapipe_jni.so"],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
|
||||
# easily incorporated into the app via, for example,
|
||||
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
|
||||
genrule(
|
||||
name = "binary_graph",
|
||||
srcs = ["//mediapipe/graphs/tracking:mobile_gpu_binary_graph"],
|
||||
outs = ["objecttrackinggpu.binarypb"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
android_library(
|
||||
name = "mediapipe_lib",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
":binary_graph",
|
||||
"//mediapipe/models:ssdlite_object_detection.tflite",
|
||||
"//mediapipe/models:ssdlite_object_detection_labelmap.txt",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "AndroidManifest.xml",
|
||||
resource_files = glob(["res/**"]),
|
||||
deps = [
|
||||
":mediapipe_jni_lib",
|
||||
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
|
||||
"//mediapipe/java/com/google/mediapipe/components:android_components",
|
||||
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||
"//mediapipe/java/com/google/mediapipe/glutil",
|
||||
"//third_party:androidx_appcompat",
|
||||
"//third_party:androidx_constraint_layout",
|
||||
"//third_party:androidx_legacy_support_v4",
|
||||
"//third_party:androidx_recyclerview",
|
||||
"//third_party:opencv",
|
||||
"@androidx_concurrent_futures//jar",
|
||||
"@androidx_lifecycle//jar",
|
||||
"@com_google_guava_android//jar",
|
||||
],
|
||||
)
|
||||
|
||||
android_binary(
|
||||
name = "objecttrackinggpu",
|
||||
manifest = "AndroidManifest.xml",
|
||||
manifest_values = {"applicationId": "com.google.mediapipe.apps.objecttrackinggpu"},
|
||||
multidex = "native",
|
||||
deps = [
|
||||
":mediapipe_lib",
|
||||
],
|
||||
)
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.apps.objecttrackinggpu;
|
||||
|
||||
import android.graphics.SurfaceTexture;
|
||||
import android.os.Bundle;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import android.util.Size;
|
||||
import android.view.SurfaceHolder;
|
||||
import android.view.SurfaceView;
|
||||
import android.view.View;
|
||||
import android.view.ViewGroup;
|
||||
import com.google.mediapipe.components.CameraHelper;
|
||||
import com.google.mediapipe.components.CameraXPreviewHelper;
|
||||
import com.google.mediapipe.components.ExternalTextureConverter;
|
||||
import com.google.mediapipe.components.FrameProcessor;
|
||||
import com.google.mediapipe.components.PermissionHelper;
|
||||
import com.google.mediapipe.framework.AndroidAssetUtil;
|
||||
import com.google.mediapipe.glutil.EglManager;
|
||||
|
||||
/** Main activity of MediaPipe example apps. */
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
private static final String TAG = "MainActivity";
|
||||
|
||||
private static final String BINARY_GRAPH_NAME = "objecttrackinggpu.binarypb";
|
||||
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
|
||||
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
|
||||
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
|
||||
|
||||
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
|
||||
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
|
||||
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
|
||||
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
|
||||
private static final boolean FLIP_FRAMES_VERTICALLY = true;
|
||||
|
||||
static {
|
||||
// Load all native libraries needed by the app.
|
||||
System.loadLibrary("mediapipe_jni");
|
||||
System.loadLibrary("opencv_java3");
|
||||
}
|
||||
|
||||
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
|
||||
private SurfaceTexture previewFrameTexture;
|
||||
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
|
||||
private SurfaceView previewDisplayView;
|
||||
|
||||
// Creates and manages an {@link EGLContext}.
|
||||
private EglManager eglManager;
|
||||
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
|
||||
// frames onto a {@link Surface}.
|
||||
private FrameProcessor processor;
|
||||
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
|
||||
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
|
||||
private ExternalTextureConverter converter;
|
||||
|
||||
// Handles camera access via the {@link CameraX} Jetpack support library.
|
||||
private CameraXPreviewHelper cameraHelper;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
|
||||
previewDisplayView = new SurfaceView(this);
|
||||
setupPreviewDisplayView();
|
||||
|
||||
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
|
||||
// binary graphs.
|
||||
AndroidAssetUtil.initializeNativeAssetManager(this);
|
||||
|
||||
eglManager = new EglManager(null);
|
||||
processor =
|
||||
new FrameProcessor(
|
||||
this,
|
||||
eglManager.getNativeContext(),
|
||||
BINARY_GRAPH_NAME,
|
||||
INPUT_VIDEO_STREAM_NAME,
|
||||
OUTPUT_VIDEO_STREAM_NAME);
|
||||
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
|
||||
|
||||
PermissionHelper.checkAndRequestCameraPermissions(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
converter = new ExternalTextureConverter(eglManager.getContext());
|
||||
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
|
||||
converter.setConsumer(processor);
|
||||
if (PermissionHelper.cameraPermissionsGranted(this)) {
|
||||
startCamera();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
converter.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRequestPermissionsResult(
|
||||
int requestCode, String[] permissions, int[] grantResults) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
|
||||
}
|
||||
|
||||
private void setupPreviewDisplayView() {
|
||||
previewDisplayView.setVisibility(View.GONE);
|
||||
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
|
||||
viewGroup.addView(previewDisplayView);
|
||||
|
||||
previewDisplayView
|
||||
.getHolder()
|
||||
.addCallback(
|
||||
new SurfaceHolder.Callback() {
|
||||
@Override
|
||||
public void surfaceCreated(SurfaceHolder holder) {
|
||||
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
|
||||
// (Re-)Compute the ideal size of the camera-preview display (the area that the
|
||||
// camera-preview frames get rendered onto, potentially with scaling and rotation)
|
||||
// based on the size of the SurfaceView that contains the display.
|
||||
Size viewSize = new Size(width, height);
|
||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void surfaceDestroyed(SurfaceHolder holder) {
|
||||
processor.getVideoSurfaceOutput().setSurface(null);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void startCamera() {
|
||||
cameraHelper = new CameraXPreviewHelper();
|
||||
cameraHelper.setOnCameraStartedListener(
|
||||
surfaceTexture -> {
|
||||
previewFrameTexture = surfaceTexture;
|
||||
// Make the display view visible to start showing the preview. This triggers the
|
||||
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
|
||||
previewDisplayView.setVisibility(View.VISIBLE);
|
||||
});
|
||||
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent">
|
||||
|
||||
<FrameLayout
|
||||
android:id="@+id/preview_display_layout"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="fill_parent"
|
||||
android:layout_weight="1">
|
||||
<TextView
|
||||
android:id="@+id/no_camera_access_view"
|
||||
android:layout_height="fill_parent"
|
||||
android:layout_width="fill_parent"
|
||||
android:gravity="center"
|
||||
android:text="@string/no_camera_access" />
|
||||
</FrameLayout>
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="colorPrimary">#008577</color>
|
||||
<color name="colorPrimaryDark">#00574B</color>
|
||||
<color name="colorAccent">#D81B60</color>
|
||||
</resources>
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
<resources>
|
||||
<string name="app_name" translatable="false">Object Tracking GPU</string>
|
||||
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
|
||||
</resources>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
<resources>
|
||||
|
||||
<!-- Base application theme. -->
|
||||
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
|
||||
<!-- Customize your theme here. -->
|
||||
<item name="colorPrimary">@color/colorPrimary</item>
|
||||
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
|
||||
<item name="colorAccent">@color/colorAccent</item>
|
||||
</style>
|
||||
|
||||
</resources>
|
||||
|
|
@ -93,15 +93,15 @@ FILEPATTERN = "kinetics_700_%s_25fps_rgb_flow"
|
|||
SPLITS = {
|
||||
"train": {
|
||||
"shards": 1000,
|
||||
"examples": 541490
|
||||
"examples": 541279
|
||||
},
|
||||
"validate": {
|
||||
"shards": 100,
|
||||
"examples": 34715
|
||||
"examples": 34688
|
||||
},
|
||||
"test": {
|
||||
"shards": 100,
|
||||
"examples": 69321
|
||||
"examples": 69278
|
||||
},
|
||||
"custom": {
|
||||
"csv": None, # Add a CSV for your own data here.
|
||||
|
|
|
|||
|
|
@ -541,5 +541,90 @@ TEST_F(CalculatorGraphEventLoopTest, WaitToAddPacketToInputStream) {
|
|||
ASSERT_EQ(kNumInputPackets, output_packets_.size());
|
||||
}
|
||||
|
||||
// Captures log messages during testing.
|
||||
class TextMessageLogSink : public LogSink {
|
||||
public:
|
||||
std::vector<std::string> messages;
|
||||
void Send(const LogEntry& entry) {
|
||||
messages.push_back(std::string(entry.text_message()));
|
||||
}
|
||||
};
|
||||
|
||||
// Verifies that CalculatorGraph::UnthrottleSources does not run repeatedly
|
||||
// in a "busy-loop" while the graph is throttled due to a graph-output stream.
|
||||
TEST_F(CalculatorGraphEventLoopTest, UnthrottleSources) {
|
||||
CalculatorGraphConfig graph_config;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(
|
||||
R"(
|
||||
node {
|
||||
calculator: "PassThroughCalculator"
|
||||
input_stream: "input_numbers"
|
||||
output_stream: "output_numbers"
|
||||
}
|
||||
input_stream: "input_numbers"
|
||||
output_stream: "output_numbers"
|
||||
num_threads: 2
|
||||
max_queue_size: 5
|
||||
)",
|
||||
&graph_config));
|
||||
constexpr int kQueueSize = 5;
|
||||
|
||||
// Initialize and start the mediapipe graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
graph.SetGraphInputStreamAddMode(
|
||||
CalculatorGraph::GraphInputStreamAddMode::ADD_IF_NOT_FULL);
|
||||
auto poller_status = graph.AddOutputStreamPoller("output_numbers");
|
||||
MP_ASSERT_OK(poller_status.status());
|
||||
mediapipe::OutputStreamPoller& poller = poller_status.ValueOrDie();
|
||||
poller.SetMaxQueueSize(kQueueSize);
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
|
||||
// Lambda that adds a packet to the calculator graph.
|
||||
auto add_packet = [&graph](std::string s, int i) {
|
||||
return graph.AddPacketToInputStream(s, MakePacket<int>(i).At(Timestamp(i)));
|
||||
};
|
||||
|
||||
// Start capturing VLOG messages from the mediapipe::Scheduler.
|
||||
TextMessageLogSink log_listener;
|
||||
mediapipe::AddLogSink(&log_listener);
|
||||
SetVLOGLevel("scheduler", 3);
|
||||
|
||||
// Add just enough packets to fill the output stream queue.
|
||||
std::vector<Packet> out_packets;
|
||||
for (int i = 0; i < kQueueSize; ++i) {
|
||||
MP_EXPECT_OK(add_packet("input_numbers", i));
|
||||
MP_EXPECT_OK(graph.WaitUntilIdle());
|
||||
}
|
||||
|
||||
// The graph is throttled due to the full output stream.
|
||||
EXPECT_FALSE(add_packet("input_numbers", kQueueSize).ok());
|
||||
|
||||
// CalculatorGraph::UnthrottleSources should be called just one time.
|
||||
absl::SleepFor(absl::Milliseconds(100));
|
||||
|
||||
// Read all packets from the output stream queue and close the graph.
|
||||
for (int i = 0; i < kQueueSize; ++i) {
|
||||
Packet packet;
|
||||
EXPECT_TRUE(poller.Next(&packet));
|
||||
out_packets.push_back(packet);
|
||||
}
|
||||
MP_EXPECT_OK(graph.CloseAllInputStreams());
|
||||
MP_EXPECT_OK(graph.WaitUntilDone());
|
||||
EXPECT_EQ(kQueueSize, out_packets.size());
|
||||
|
||||
// Stop capturing VLOG messages.
|
||||
SetVLOGLevel("scheduler", 0);
|
||||
mediapipe::RemoveLogSink(&log_listener);
|
||||
|
||||
// Count and validate the calls to UnthrottleSources.
|
||||
int loop_count = 0;
|
||||
for (auto& message : log_listener.messages) {
|
||||
loop_count += (message == "HandleIdle: unthrottling") ? 1 : 0;
|
||||
}
|
||||
EXPECT_GE(loop_count, 1);
|
||||
EXPECT_LE(loop_count, 2);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
|
|
|
|||
|
|
@ -241,7 +241,9 @@ cc_test(
|
|||
proto_library(
|
||||
name = "rect_proto",
|
||||
srcs = ["rect.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
visibility = [
|
||||
"//visibility:public",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
|
|
@ -253,10 +255,21 @@ mediapipe_cc_proto_library(
|
|||
deps = [":rect_proto"],
|
||||
)
|
||||
|
||||
java_lite_proto_library(
|
||||
name = "rect_java_proto_lite",
|
||||
strict_deps = 0,
|
||||
visibility = [
|
||||
"//mediapipe:__subpackages__",
|
||||
],
|
||||
deps = [":landmark_proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "landmark_proto",
|
||||
srcs = ["landmark.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
visibility = [
|
||||
"//visibility:public",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
|
|
@ -269,7 +282,9 @@ mediapipe_cc_proto_library(
|
|||
java_lite_proto_library(
|
||||
name = "landmark_java_proto_lite",
|
||||
strict_deps = 0,
|
||||
visibility = ["//mediapipe:__subpackages__"],
|
||||
visibility = [
|
||||
"//mediapipe:__subpackages__",
|
||||
],
|
||||
deps = [":landmark_proto"],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -176,6 +176,7 @@ cc_library(
|
|||
deps = [
|
||||
"//mediapipe/framework:port",
|
||||
"@com_github_glog_glog//:glog",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,50 @@
|
|||
#ifndef MEDIAPIPE_PORT_LOGGING_H_
|
||||
#define MEDIAPIPE_PORT_LOGGING_H_
|
||||
|
||||
#include "absl/time/time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace mediapipe {
|
||||
using LogSeverity = google::LogSeverity;
|
||||
const auto SetVLOGLevel = google::SetVLOGLevel;
|
||||
class LogEntry {
|
||||
public:
|
||||
LogEntry(LogSeverity severity, const struct ::tm* tm_time,
|
||||
absl::string_view message)
|
||||
: severity_(severity),
|
||||
timestamp_(absl::FromTM(*tm_time, absl::LocalTimeZone())),
|
||||
text_message_(message) {}
|
||||
LogSeverity log_severity() const { return severity_; }
|
||||
absl::Time timestamp() const { return timestamp_; }
|
||||
absl::string_view text_message() const { return text_message_; }
|
||||
|
||||
private:
|
||||
LogSeverity severity_;
|
||||
absl::Time timestamp_;
|
||||
absl::string_view text_message_;
|
||||
};
|
||||
class LogSink : public google::LogSink {
|
||||
public:
|
||||
virtual ~LogSink() = default;
|
||||
virtual void Send(const LogEntry& entry) = 0;
|
||||
virtual void WaitTillSent() {}
|
||||
|
||||
private:
|
||||
virtual void send(LogSeverity severity, const char* full_filename,
|
||||
const char* base_filename, int line,
|
||||
const struct ::tm* tm_time, const char* message,
|
||||
size_t message_len) {
|
||||
LogEntry log_entry(severity, tm_time,
|
||||
absl::string_view(message, message_len));
|
||||
Send(log_entry);
|
||||
}
|
||||
};
|
||||
inline void AddLogSink(LogSink* destination) {
|
||||
google::AddLogSink(destination);
|
||||
}
|
||||
inline void RemoveLogSink(LogSink* destination) {
|
||||
google::RemoveLogSink(destination);
|
||||
}
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_PORT_LOGGING_H_
|
||||
|
|
|
|||
|
|
@ -0,0 +1,176 @@
|
|||
# MediaPipe graph that performs LSTM based object detection on desktop with
|
||||
# TensorFlow Lite on CPU.
|
||||
|
||||
# max_queue_size limits the number of packets enqueued on any input stream
|
||||
# by throttling inputs to the graph. This makes the graph only process one
|
||||
# frame per time.
|
||||
max_queue_size: 1
|
||||
|
||||
# Decodes an input video file into images and a video header.
|
||||
node {
|
||||
calculator: "OpenCvVideoDecoderCalculator"
|
||||
input_side_packet: "INPUT_FILE_PATH:input_video_path"
|
||||
output_stream: "VIDEO:input_video"
|
||||
output_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
}
|
||||
|
||||
# Transforms the input image on CPU to a 256x256 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "IMAGE:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 256
|
||||
output_height: 256
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on CPU into an image tensor as a
|
||||
# TfLiteTensor. The zero_center option is set to true to normalize the
|
||||
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE:transformed_input_video"
|
||||
output_stream: "TENSORS:image_tensor"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
||||
zero_center: true
|
||||
use_quantized_tensors: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Joins the input image_tensor and previous lstm state tensors into a single
|
||||
# output vector of tensors.
|
||||
node {
|
||||
calculator: "ConcatenateTfLiteTensorVectorCalculator"
|
||||
input_stream: "image_tensor"
|
||||
input_stream: "prev_lstm_tensors"
|
||||
output_stream: "concatenated_tensors"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and state
|
||||
# tensors, then outputs a vector of tensors representing, for instance,
|
||||
# detection boxes/keypoints and scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS:concatenated_tensors"
|
||||
output_stream: "TENSORS:inference_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/lstdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Splits up the output tensors into two streams [detection_boxes,
|
||||
# detection_classes, detection_scores, num_detections], and [lstm_c, lstm_h].
|
||||
node {
|
||||
calculator: "SplitTfLiteTensorVectorCalculator"
|
||||
input_stream: "inference_tensors"
|
||||
output_stream: "detection_tensors"
|
||||
output_stream: "lstm_tensors"
|
||||
options {
|
||||
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
||||
ranges: { begin: 0 end: 4 }
|
||||
ranges: { begin: 4 end: 6 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Waits for the LSTM state output from the TfLite inferencing to feed back in as
|
||||
# an input tensor for the next frame.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:transformed_input_video"
|
||||
input_stream: "LOOP:lstm_tensors"
|
||||
input_stream_info: { tag_index: "LOOP" back_edge: true }
|
||||
output_stream: "PREV_LOOP:prev_lstm_tensors"
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the specification in the options, into a vector of detections. Each detection
|
||||
# describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 7
|
||||
num_boxes: 40
|
||||
num_coords: 4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
min_score_threshold: 0.2
|
||||
max_num_detections: 5
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/lstdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "RENDER_DATA:render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 2.0
|
||||
color { r: 255 g: 255 b: 255 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the original image coming into
|
||||
# the graph.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "INPUT_FRAME:input_video"
|
||||
input_stream: "render_data"
|
||||
output_stream: "OUTPUT_FRAME:output_video"
|
||||
}
|
||||
|
||||
# Encodes the annotated images into a video file, adopting properties specified
|
||||
# in the input video header, e.g., video framerate.
|
||||
node {
|
||||
calculator: "OpenCvVideoEncoderCalculator"
|
||||
input_stream: "VIDEO:output_video"
|
||||
input_stream: "VIDEO_PRESTREAM:input_video_header"
|
||||
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
|
||||
codec: "avc1"
|
||||
video_format: "mp4"
|
||||
}
|
||||
}
|
||||
}
|
||||
39
mediapipe/graphs/tracking/BUILD
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_binary_graph",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "mobile_calculators",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:packet_resampler_calculator",
|
||||
"//mediapipe/graphs/tracking/subgraphs:object_detection_gpu",
|
||||
"//mediapipe/graphs/tracking/subgraphs:object_tracking",
|
||||
"//mediapipe/graphs/tracking/subgraphs:renderer_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "mobile_gpu_binary_graph",
|
||||
graph = "object_detection_tracking_mobile_gpu.pbtxt",
|
||||
output_name = "mobile_gpu.binarypb",
|
||||
deps = [":mobile_calculators"],
|
||||
)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# MediaPipe graph that performs object detection and tracking with TensorFlow
|
||||
# Lite on GPU.
|
||||
# Used in the examples in
|
||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
|
||||
|
||||
# Images on GPU coming into and out of the graph.
|
||||
input_stream: "input_video"
|
||||
output_stream: "output_video"
|
||||
|
||||
# Resamples the images by specific frame rate. This calculator is used to
|
||||
# control the frequecy of subsequent calculators/subgraphs, e.g. less power
|
||||
# consumption for expensive process.
|
||||
node {
|
||||
calculator: "PacketResamplerCalculator"
|
||||
input_stream: "DATA:input_video"
|
||||
output_stream: "DATA:throttled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
|
||||
frame_rate: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Subgraph that detections objects (see object_detection_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "ObjectDetectionSubgraph"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
}
|
||||
|
||||
# Subgraph that tracks objects (see object_tracking.pbtxt).
|
||||
node {
|
||||
calculator: "ObjectTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "DETECTIONS:output_detections"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
}
|
||||
|
||||
# Subgraph that renders annotations and overlays them on top of the input
|
||||
# images (see renderer_gpu.pbtxt).
|
||||
node {
|
||||
calculator: "RendererSubgraph"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "DETECTIONS:tracked_detections"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
76
mediapipe/graphs/tracking/subgraphs/BUILD
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "box_tracking",
|
||||
graph = "box_tracking.pbtxt",
|
||||
register_as = "BoxTrackingSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/video:box_tracker_calculator",
|
||||
"//mediapipe/calculators/video:flow_packager_calculator",
|
||||
"//mediapipe/calculators/video:motion_analysis_calculator",
|
||||
"//mediapipe/framework/stream_handler:immediate_input_stream_handler",
|
||||
"//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
|
||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "object_tracking",
|
||||
graph = "object_tracking.pbtxt",
|
||||
register_as = "ObjectTrackingSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detection_unique_id_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_timed_box_list_calculator",
|
||||
"//mediapipe/calculators/video:tracked_detection_manager_calculator",
|
||||
"//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
|
||||
"//mediapipe/graphs/tracking/subgraphs:box_tracking",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "object_detection_gpu",
|
||||
graph = "object_detection_gpu.pbtxt",
|
||||
register_as = "ObjectDetectionSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "renderer_gpu",
|
||||
graph = "renderer_gpu.pbtxt",
|
||||
register_as = "RendererSubgraph",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:rect_to_render_data_calculator",
|
||||
],
|
||||
)
|
||||
126
mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# MediaPipe box tracking subgraph.
|
||||
|
||||
type: "BoxTrackingSubgraph"
|
||||
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "BOXES:start_pos"
|
||||
input_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "BOXES:boxes"
|
||||
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "IMAGE_GPU:downscaled_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 240
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts GPU buffer to ImageFrame for processing tracking.
|
||||
node: {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "downscaled_input_video"
|
||||
output_stream: "downscaled_input_video_cpu"
|
||||
}
|
||||
|
||||
# Performs motion analysis on an incoming video stream.
|
||||
node: {
|
||||
calculator: "MotionAnalysisCalculator"
|
||||
input_stream: "VIDEO:downscaled_input_video_cpu"
|
||||
output_stream: "CAMERA:camera_motion"
|
||||
output_stream: "FLOW:region_flow"
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
|
||||
analysis_options {
|
||||
analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
|
||||
flow_options {
|
||||
fast_estimation_min_block_size: 100
|
||||
top_inlier_sets: 1
|
||||
frac_inlier_error_threshold: 3e-3
|
||||
downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
|
||||
verification_distance: 5.0
|
||||
verify_long_feature_acceleration: true
|
||||
verify_long_feature_trigger_ratio: 0.1
|
||||
tracking_options {
|
||||
max_features: 500
|
||||
adaptive_extraction_levels: 2
|
||||
min_eig_val_settings {
|
||||
adaptive_lowest_quality_level: 2e-4
|
||||
}
|
||||
klt_tracker_implementation: KLT_OPENCV
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Reads optical flow fields defined in
|
||||
# mediapipe/framework/formats/motion/optical_flow_field.h,
|
||||
# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
|
||||
# to 0-255.
|
||||
node: {
|
||||
calculator: "FlowPackagerCalculator"
|
||||
input_stream: "FLOW:region_flow"
|
||||
input_stream: "CAMERA:camera_motion"
|
||||
output_stream: "TRACKING:tracking_data"
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
|
||||
flow_packager_options: {
|
||||
binary_tracking_data_support: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Tracks box positions over time.
|
||||
node: {
|
||||
calculator: "BoxTrackerCalculator"
|
||||
input_stream: "TRACKING:tracking_data"
|
||||
input_stream: "TRACK_TIME:input_video"
|
||||
input_stream: "START_POS:start_pos"
|
||||
input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
input_stream_info: {
|
||||
tag_index: "CANCEL_OBJECT_ID"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "BOXES:boxes"
|
||||
|
||||
input_stream_handler {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext] {
|
||||
sync_set {
|
||||
tag_index: "TRACKING"
|
||||
tag_index: "TRACK_TIME"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "START_POS"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "CANCEL_OBJECT_ID"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
|
||||
tracker_options: {
|
||||
track_step_options {
|
||||
track_object_and_camera: true
|
||||
tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
|
||||
inlier_spring_force: 0.0
|
||||
static_motion_temporal_ratio: 3e-2
|
||||
}
|
||||
}
|
||||
visualize_tracking_data: false
|
||||
streaming_track_data_cache_size: 100
|
||||
}
|
||||
}
|
||||
}
|
||||
128
mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
# MediaPipe object detection subgraph.
|
||||
|
||||
type: "ObjectDetectionSubgraph"
|
||||
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "DETECTIONS:output_detections"
|
||||
|
||||
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
|
||||
# default it uses the STRETCH scale mode that maps the entire input image to the
|
||||
# entire transformed image. As a result, image aspect ratio may be changed and
|
||||
# objects in the image may be deformed (stretched or squeezed), but the object
|
||||
# detection model used in this graph is agnostic to that deformation.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "IMAGE_GPU:transformed_input_video"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
||||
output_width: 320
|
||||
output_height: 320
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the transformed input image on GPU into an image tensor stored as a
|
||||
# TfLiteTensor.
|
||||
node {
|
||||
calculator: "TfLiteConverterCalculator"
|
||||
input_stream: "IMAGE_GPU:transformed_input_video"
|
||||
output_stream: "TENSORS_GPU:image_tensor"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "TfLiteInferenceCalculator"
|
||||
input_stream: "TENSORS_GPU:image_tensor"
|
||||
output_stream: "TENSORS_GPU:detection_tensors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
||||
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 320
|
||||
input_size_width: 320
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS_GPU:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
||||
num_classes: 91
|
||||
num_boxes: 2034
|
||||
num_coords: 4
|
||||
ignore_classes: 0
|
||||
sigmoid_score: true
|
||||
apply_exponential_on_box_size: true
|
||||
x_scale: 10.0
|
||||
y_scale: 10.0
|
||||
h_scale: 5.0
|
||||
w_scale: 5.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "detections"
|
||||
output_stream: "filtered_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
||||
min_suppression_threshold: 0.4
|
||||
max_num_detections: 3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
return_empty_detections: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Maps detection label IDs to the corresponding label text. The label map is
|
||||
# provided in the label_map_path option.
|
||||
node {
|
||||
calculator: "DetectionLabelIdToTextCalculator"
|
||||
input_stream: "filtered_detections"
|
||||
output_stream: "output_detections"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
|
||||
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
|
||||
}
|
||||
}
|
||||
}
|
||||
56
mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# MediaPipe object tracking subgraph.
|
||||
|
||||
type: "ObjectTrackingSubgraph"
|
||||
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "DETECTIONS:new_detections"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
|
||||
# Assigns an unique id for each new detection.
|
||||
node {
|
||||
calculator: "DetectionUniqueIdCalculator"
|
||||
input_stream: "DETECTIONS:new_detections"
|
||||
output_stream: "DETECTIONS:detections_with_id"
|
||||
}
|
||||
|
||||
# Converts detections to TimedBox protos which are used as initial location
|
||||
# for tracking.
|
||||
node {
|
||||
calculator: "DetectionsToTimedBoxListCalculator"
|
||||
input_stream: "DETECTIONS:detections_with_id"
|
||||
output_stream: "BOXES:start_pos"
|
||||
}
|
||||
|
||||
# Subgraph that tracks boxes (see box_tracking.pbtxt).
|
||||
node {
|
||||
calculator: "BoxTrackingSubgraph"
|
||||
input_stream: "VIDEO:input_video"
|
||||
input_stream: "BOXES:start_pos"
|
||||
input_stream: "CANCEL_ID:cancel_object_id"
|
||||
output_stream: "BOXES:boxes"
|
||||
}
|
||||
|
||||
# Managers new detected objects and objects that are being tracked.
|
||||
# It associates the duplicated detections and updates the locations of
|
||||
# detections from tracking.
|
||||
node: {
|
||||
calculator: "TrackedDetectionManagerCalculator"
|
||||
input_stream: "DETECTIONS:detections_with_id"
|
||||
input_stream: "TRACKING_BOXES:boxes"
|
||||
output_stream: "DETECTIONS:tracked_detections"
|
||||
output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
|
||||
|
||||
input_stream_handler {
|
||||
input_stream_handler: "SyncSetInputStreamHandler"
|
||||
options {
|
||||
[mediapipe.SyncSetInputStreamHandlerOptions.ext] {
|
||||
sync_set {
|
||||
tag_index: "TRACKING_BOXES"
|
||||
}
|
||||
sync_set {
|
||||
tag_index: "DETECTIONS"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
29
mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
# MediaPipe object tracking rendering subgraph.
|
||||
|
||||
type: "RendererSubgraph"
|
||||
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "IMAGE:output_image"
|
||||
|
||||
# Converts the detections to drawing primitives for annotation overlay.
|
||||
node {
|
||||
calculator: "DetectionsToRenderDataCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "RENDER_DATA:detections_render_data"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||
thickness: 4.0
|
||||
color { r: 255 g: 0 b: 0 }
|
||||
render_detection_id: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Draws annotations and overlays them on top of the input images.
|
||||
node {
|
||||
calculator: "AnnotationOverlayCalculator"
|
||||
input_stream: "INPUT_FRAME_GPU:input_image"
|
||||
input_stream: "detections_render_data"
|
||||
output_stream: "OUTPUT_FRAME_GPU:output_image"
|
||||
}
|
||||
|
|
@ -50,4 +50,16 @@ public abstract class MediaPipeRunner extends Graph {
|
|||
* Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
|
||||
*/
|
||||
public abstract void release();
|
||||
/**
|
||||
* Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
|
||||
*
|
||||
* <p>Like {@link #release()} but with a timeout. The default behavior is to call
|
||||
* {@link #release()}. The implementation can override this method if it cares about timing out
|
||||
* releasing resources.
|
||||
*
|
||||
* @param timeoutMillis the time it takes to force timeout from releasing mff context.
|
||||
*/
|
||||
public void release(long timeoutMillis) {
|
||||
release();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
742
mediapipe/util/tracking/BUILD
Normal file
|
|
@ -0,0 +1,742 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
proto_library(
|
||||
name = "tone_models_proto",
|
||||
srcs = ["tone_models.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "tone_estimation_proto",
|
||||
srcs = ["tone_estimation.proto"],
|
||||
deps = [":tone_models_proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "region_flow_computation_proto",
|
||||
srcs = ["region_flow_computation.proto"],
|
||||
deps = [
|
||||
":tone_estimation_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "motion_saliency_proto",
|
||||
srcs = ["motion_saliency.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "motion_estimation_proto",
|
||||
srcs = ["motion_estimation.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "motion_analysis_proto",
|
||||
srcs = ["motion_analysis.proto"],
|
||||
deps = [
|
||||
":motion_estimation_proto",
|
||||
":motion_saliency_proto",
|
||||
":region_flow_computation_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "region_flow_proto",
|
||||
srcs = ["region_flow.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "motion_models_proto",
|
||||
srcs = ["motion_models.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "camera_motion_proto",
|
||||
srcs = ["camera_motion.proto"],
|
||||
deps = [":motion_models_proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "push_pull_filtering_proto",
|
||||
srcs = ["push_pull_filtering.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "frame_selection_solution_evaluator_proto",
|
||||
srcs = ["frame_selection_solution_evaluator.proto"],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "frame_selection_proto",
|
||||
srcs = ["frame_selection.proto"],
|
||||
deps = [
|
||||
":camera_motion_proto",
|
||||
":frame_selection_solution_evaluator_proto",
|
||||
":region_flow_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "flow_packager_proto",
|
||||
srcs = ["flow_packager.proto"],
|
||||
deps = [
|
||||
":motion_models_proto",
|
||||
":region_flow_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "tracking_proto",
|
||||
srcs = ["tracking.proto"],
|
||||
deps = [
|
||||
":motion_models_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library(
|
||||
name = "box_tracker_proto",
|
||||
srcs = ["box_tracker.proto"],
|
||||
deps = [":tracking_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "tone_models_cc_proto",
|
||||
srcs = ["tone_models.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":tone_models_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "tone_estimation_cc_proto",
|
||||
srcs = ["tone_estimation.proto"],
|
||||
cc_deps = [":tone_models_cc_proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":tone_estimation_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "region_flow_computation_cc_proto",
|
||||
srcs = ["region_flow_computation.proto"],
|
||||
cc_deps = [
|
||||
":tone_estimation_cc_proto",
|
||||
":tone_models_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":region_flow_computation_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "motion_saliency_cc_proto",
|
||||
srcs = ["motion_saliency.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":motion_saliency_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "motion_estimation_cc_proto",
|
||||
srcs = ["motion_estimation.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":motion_estimation_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "motion_analysis_cc_proto",
|
||||
srcs = ["motion_analysis.proto"],
|
||||
cc_deps = [
|
||||
":motion_estimation_cc_proto",
|
||||
":motion_saliency_cc_proto",
|
||||
":region_flow_computation_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":motion_analysis_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "region_flow_cc_proto",
|
||||
srcs = ["region_flow.proto"],
|
||||
cc_deps = [":motion_models_cc_proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":region_flow_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "motion_models_cc_proto",
|
||||
srcs = ["motion_models.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":motion_models_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "camera_motion_cc_proto",
|
||||
srcs = ["camera_motion.proto"],
|
||||
cc_deps = [":motion_models_cc_proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":camera_motion_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "push_pull_filtering_cc_proto",
|
||||
srcs = ["push_pull_filtering.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":push_pull_filtering_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "frame_selection_solution_evaluator_cc_proto",
|
||||
srcs = ["frame_selection_solution_evaluator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":frame_selection_solution_evaluator_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "frame_selection_cc_proto",
|
||||
srcs = ["frame_selection.proto"],
|
||||
cc_deps = [
|
||||
":camera_motion_cc_proto",
|
||||
":frame_selection_solution_evaluator_cc_proto",
|
||||
":region_flow_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":frame_selection_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "flow_packager_cc_proto",
|
||||
srcs = ["flow_packager.proto"],
|
||||
cc_deps = [
|
||||
":motion_models_cc_proto",
|
||||
":region_flow_cc_proto",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":flow_packager_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "tracking_cc_proto",
|
||||
srcs = ["tracking.proto"],
|
||||
cc_deps = [":motion_models_cc_proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":tracking_proto"],
|
||||
)
|
||||
|
||||
mediapipe_cc_proto_library(
|
||||
name = "box_tracker_cc_proto",
|
||||
srcs = ["box_tracker.proto"],
|
||||
cc_deps = [":tracking_cc_proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":box_tracker_proto"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_models",
|
||||
srcs = ["motion_models.cc"],
|
||||
hdrs = ["motion_models.h"],
|
||||
deps = [
|
||||
":camera_motion_cc_proto",
|
||||
":motion_models_cc_proto",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:singleton",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@eigen_archive//:eigen",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_models_cv",
|
||||
srcs = ["motion_models_cv.cc"],
|
||||
hdrs = ["motion_models_cv.h"],
|
||||
deps = [
|
||||
":camera_motion_cc_proto",
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "region_flow",
|
||||
srcs = ["region_flow.cc"],
|
||||
hdrs = ["region_flow.h"],
|
||||
deps = [
|
||||
":measure_time",
|
||||
":motion_models",
|
||||
":parallel_invoker",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "camera_motion",
|
||||
srcs = ["camera_motion.cc"],
|
||||
hdrs = ["camera_motion.h"],
|
||||
deps = [
|
||||
":camera_motion_cc_proto",
|
||||
":motion_models",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
PARALLEL_COPTS = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
|
||||
"//mediapipe:apple": [],
|
||||
"//mediapipe:android": [],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
PARALLEL_LINKOPTS = []
|
||||
|
||||
cc_library(
|
||||
name = "measure_time",
|
||||
srcs = ["measure_time.cc"],
|
||||
hdrs = ["measure_time.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "parallel_invoker",
|
||||
srcs = ["parallel_invoker.cc"],
|
||||
hdrs = ["parallel_invoker.h"],
|
||||
copts = PARALLEL_COPTS,
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":parallel_invoker_forbid_mixed_active",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:threadpool",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "parallel_invoker_forbid_mixed_active",
|
||||
srcs = ["parallel_invoker_forbid_mixed.cc"],
|
||||
copts = PARALLEL_COPTS,
|
||||
alwayslink = 1, # Forces all symbols to be included.
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_util",
|
||||
srcs = ["image_util.cc"],
|
||||
hdrs = ["image_util.h"],
|
||||
deps = [
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:vector",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "streaming_buffer",
|
||||
srcs = ["streaming_buffer.cc"],
|
||||
hdrs = ["streaming_buffer.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:logging",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/types:any",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_estimation",
|
||||
srcs = ["motion_estimation.cc"],
|
||||
hdrs = ["motion_estimation.h"],
|
||||
copts = PARALLEL_COPTS,
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":camera_motion",
|
||||
":camera_motion_cc_proto",
|
||||
":measure_time",
|
||||
":motion_estimation_cc_proto",
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
":parallel_invoker",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@eigen_archive//:eigen",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_saliency",
|
||||
srcs = ["motion_saliency.cc"],
|
||||
hdrs = ["motion_saliency.h"],
|
||||
deps = [
|
||||
":camera_motion",
|
||||
":measure_time",
|
||||
":motion_saliency_cc_proto",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:vector",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "push_pull_filtering",
|
||||
hdrs = ["push_pull_filtering.h"],
|
||||
deps = [
|
||||
":image_util",
|
||||
":push_pull_filtering_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_highgui",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tone_models",
|
||||
srcs = ["tone_models.cc"],
|
||||
hdrs = ["tone_models.h"],
|
||||
deps = [
|
||||
":tone_models_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tone_estimation",
|
||||
srcs = ["tone_estimation.cc"],
|
||||
hdrs = ["tone_estimation.h"],
|
||||
deps = [
|
||||
":motion_models_cc_proto",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
":tone_estimation_cc_proto",
|
||||
":tone_models",
|
||||
":tone_models_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:vector",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "region_flow_computation",
|
||||
srcs = ["region_flow_computation.cc"],
|
||||
hdrs = ["region_flow_computation.h"],
|
||||
copts = PARALLEL_COPTS,
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":camera_motion_cc_proto",
|
||||
":image_util",
|
||||
":measure_time",
|
||||
":motion_estimation",
|
||||
":motion_estimation_cc_proto",
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
":parallel_invoker",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
":region_flow_computation_cc_proto",
|
||||
":tone_estimation",
|
||||
":tone_estimation_cc_proto",
|
||||
":tone_models",
|
||||
":tone_models_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_features2d",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:opencv_video",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/container:node_hash_set",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "region_flow_visualization",
|
||||
srcs = ["region_flow_visualization.cc"],
|
||||
hdrs = ["region_flow_visualization.h"],
|
||||
deps = [
|
||||
":measure_time",
|
||||
":motion_models",
|
||||
":parallel_invoker",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "motion_analysis",
|
||||
srcs = ["motion_analysis.cc"],
|
||||
hdrs = ["motion_analysis.h"],
|
||||
deps = [
|
||||
":camera_motion",
|
||||
":camera_motion_cc_proto",
|
||||
":image_util",
|
||||
":measure_time",
|
||||
":motion_analysis_cc_proto",
|
||||
":motion_estimation",
|
||||
":motion_estimation_cc_proto",
|
||||
":motion_saliency",
|
||||
":motion_saliency_cc_proto",
|
||||
":push_pull_filtering",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
":region_flow_computation",
|
||||
":region_flow_computation_cc_proto",
|
||||
":region_flow_visualization",
|
||||
":streaming_buffer",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "flow_packager",
|
||||
srcs = ["flow_packager.cc"],
|
||||
hdrs = ["flow_packager.h"],
|
||||
deps = [
|
||||
":camera_motion",
|
||||
":camera_motion_cc_proto",
|
||||
":flow_packager_cc_proto",
|
||||
":motion_estimation",
|
||||
":motion_estimation_cc_proto",
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracking",
|
||||
srcs = ["tracking.cc"],
|
||||
hdrs = ["tracking.h"],
|
||||
copts = PARALLEL_COPTS,
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":flow_packager_cc_proto",
|
||||
":measure_time",
|
||||
":motion_models",
|
||||
":motion_models_cc_proto",
|
||||
":motion_models_cv",
|
||||
":parallel_invoker",
|
||||
":region_flow",
|
||||
":tracking_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_calib3d",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "box_tracker",
|
||||
srcs = ["box_tracker.cc"],
|
||||
hdrs = ["box_tracker.h"],
|
||||
deps = [
|
||||
":box_tracker_cc_proto",
|
||||
":flow_packager_cc_proto",
|
||||
":measure_time",
|
||||
":tracking",
|
||||
":tracking_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:threadpool",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracking_visualization_utilities",
|
||||
srcs = ["tracking_visualization_utilities.cc"],
|
||||
hdrs = ["tracking_visualization_utilities.h"],
|
||||
deps = [
|
||||
":box_tracker",
|
||||
":box_tracker_cc_proto",
|
||||
":flow_packager_cc_proto",
|
||||
":tracking",
|
||||
":tracking_cc_proto",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "parallel_invoker_test",
|
||||
srcs = ["parallel_invoker_test.cc"],
|
||||
copts = PARALLEL_COPTS,
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":parallel_invoker",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "image_util_test",
|
||||
srcs = [
|
||||
"image_util_test.cc",
|
||||
],
|
||||
deps = [
|
||||
":image_util",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "motion_models_test",
|
||||
srcs = ["motion_models_test.cc"],
|
||||
deps = [
|
||||
":motion_estimation",
|
||||
":motion_models",
|
||||
"//mediapipe/framework/deps:message_matchers",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/port:vector",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "region_flow_computation_test",
|
||||
srcs = ["region_flow_computation_test.cc"],
|
||||
copts = PARALLEL_COPTS,
|
||||
data = ["testdata/stabilize_test.png"],
|
||||
linkopts = PARALLEL_LINKOPTS,
|
||||
deps = [
|
||||
":region_flow",
|
||||
":region_flow_cc_proto",
|
||||
":region_flow_computation",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/port:commandlineflags",
|
||||
"//mediapipe/framework/port:file_helpers",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_highgui",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/time",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "box_tracker_test",
|
||||
timeout = "short",
|
||||
srcs = ["box_tracker_test.cc"],
|
||||
data = glob(["testdata/box_tracker/*"]),
|
||||
deps = [
|
||||
":box_tracker",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracked_detection",
|
||||
srcs = [
|
||||
"tracked_detection.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"tracked_detection.h",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "tracked_detection_test",
|
||||
srcs = [
|
||||
"tracked_detection_test.cc",
|
||||
],
|
||||
deps = [
|
||||
":tracked_detection",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tracked_detection_manager",
|
||||
srcs = [
|
||||
"tracked_detection_manager.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"tracked_detection_manager.h",
|
||||
],
|
||||
deps = [
|
||||
":tracked_detection",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"@com_google_absl//absl/container:node_hash_map",
|
||||
],
|
||||
)
|
||||
2
mediapipe/util/tracking/OWNERS
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
jianingwei
|
||||
yegenzhi
|
||||
1062
mediapipe/util/tracking/box_tracker.cc
Normal file
372
mediapipe/util/tracking/box_tracker.h
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "mediapipe/framework/port/threadpool.h"
|
||||
#include "mediapipe/util/tracking/box_tracker.pb.h"
|
||||
#include "mediapipe/util/tracking/flow_packager.pb.h"
|
||||
#include "mediapipe/util/tracking/tracking.h"
|
||||
#include "mediapipe/util/tracking/tracking.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Describes a rectangle box at an instance of time.
|
||||
struct TimedBox {
|
||||
static const int kNumQuadVertices = 4;
|
||||
// Dimensions are in normalized coordinates [0, 1].
|
||||
float top = 0;
|
||||
float left = 0;
|
||||
float bottom = 0;
|
||||
float right = 0;
|
||||
// Rotation of box w.r.t. center in radians.
|
||||
float rotation = 0;
|
||||
int64 time_msec = 0;
|
||||
// Confidence of the tracked box in range [0, 1].
|
||||
float confidence = 0;
|
||||
std::vector<Vector2_f> quad_vertices;
|
||||
// Aspect ratio (width / height) for the tracked rectangle in physical space.
|
||||
float aspect_ratio = -1.0;
|
||||
// Whether we want this box to be potentially grouped with other boxes
|
||||
// to track together. This is useful for tracking small boxes that lie
|
||||
// on a plane. For example, when we detect a plane,
|
||||
// track the plane, then all boxes within the plane can share the same
|
||||
// homography transform.
|
||||
bool request_grouping = false;
|
||||
|
||||
bool operator<(const TimedBox& rhs) const {
|
||||
return time_msec < rhs.time_msec;
|
||||
}
|
||||
|
||||
// Returns (1.0 - alpha) * lhs + alpha * rhs;
|
||||
static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha);
|
||||
|
||||
// Returns lhs * alpha + rhs * beta;
|
||||
static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha,
|
||||
double beta);
|
||||
|
||||
std::string ToString() const {
|
||||
return absl::StrFormat(
|
||||
"top: %.3f left: %.3f bottom: %.3f right: %.3f "
|
||||
"rot: %.3f t: %d",
|
||||
top, left, bottom, right, rotation, static_cast<int64_t>(time_msec));
|
||||
}
|
||||
|
||||
// Returns corners of TimedBox in the requested domain.
|
||||
std::array<Vector2_f, 4> Corners(float width, float height) const;
|
||||
|
||||
static TimedBox FromProto(const TimedBoxProto& proto);
|
||||
TimedBoxProto ToProto() const;
|
||||
};
|
||||
|
||||
// TimedBox augment with internal states.
|
||||
struct InternalTimedBox : public TimedBox {
|
||||
InternalTimedBox() = default;
|
||||
// Convenience constructor.
|
||||
InternalTimedBox(const TimedBox& box, const MotionBoxState* state_)
|
||||
: TimedBox(box), state(state_) {}
|
||||
|
||||
// Corresponding MotionBoxState a TimedBox.
|
||||
std::shared_ptr<const MotionBoxState> state;
|
||||
};
|
||||
|
||||
// Initializes beginning tracking state from a TimedBox.
|
||||
void MotionBoxStateFromTimedBox(const TimedBox& box, MotionBoxState* state);
|
||||
|
||||
// Retrieves box position and time from a tracking state.
|
||||
void TimedBoxFromMotionBoxState(const MotionBoxState& state, TimedBox* box);
|
||||
|
||||
// Downgrade tracking degrees of TrackStepOptions to
|
||||
// TRACKING_DEGREE_OBJECT_ROTATION_SCALE if originally
|
||||
// specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but start pos
|
||||
// does not contain quad or contains invalid quad.
|
||||
template <typename T>
|
||||
void ChangeTrackingDegreesBasedOnStartPos(
|
||||
const T& start_pos, TrackStepOptions* track_step_options) {
|
||||
if (track_step_options->tracking_degrees() ==
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE &&
|
||||
(!start_pos.has_quad() || start_pos.quad().vertices_size() != 8)) {
|
||||
track_step_options->set_tracking_degrees(
|
||||
TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE);
|
||||
VLOG(1) << "Originally specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but "
|
||||
"changed to TRACKING_DEGREE_OBJECT_ROTATION_SCALE";
|
||||
}
|
||||
}
|
||||
|
||||
// Set of boxes for a particular checkpoint.
|
||||
typedef std::deque<InternalTimedBox> PathSegment;
|
||||
|
||||
// Stores the PathSegment for each checkpoint time.
|
||||
typedef std::map<int, PathSegment> Path;
|
||||
|
||||
// Returns box at specified time for a specific path segment.
|
||||
// Returns true on success, otherwise box is left untouched.
|
||||
// Optionally returns closest known MotionBoxState if state is not null.
|
||||
bool TimedBoxAtTime(const PathSegment& segment, int64 time_msec, TimedBox* box,
|
||||
MotionBoxState* state = nullptr);
|
||||
|
||||
// Tracks timed boxes from cached TrackingDataChunks created by
|
||||
// FlowPackagerCalculator. For usage see accompanying test.
|
||||
class BoxTracker {
|
||||
public:
|
||||
// Initializes a new BoxTracker to work on cached TrackingData from a chunk
|
||||
// directory.
|
||||
BoxTracker(const std::string& cache_dir, const BoxTrackerOptions& options);
|
||||
|
||||
// Initializes a new BoxTracker to work on the passed TrackingDataChunks.
|
||||
// If copy_data is true, BoxTracker will retain its own copy of the data;
|
||||
// otherwise the passed pointer need to be valid for the lifetime of the
|
||||
// BoxTracker.
|
||||
BoxTracker(const std::vector<const TrackingDataChunk*>& tracking_data,
|
||||
bool copy_data, const BoxTrackerOptions& options);
|
||||
|
||||
// Add single TrackingDataChunk. This chunk must be correctly aligned with
|
||||
// existing chunks. If chunk starting timestamp is larger than next valid
|
||||
// chunk timestamp, empty chunks will be added to fill the gap. If copy_data
|
||||
// is true, BoxTracker will retain its own copy of the data; otherwise the
|
||||
// passed pointers need to be valid for the lifetime of the BoxTracker.
|
||||
void AddTrackingDataChunk(const TrackingDataChunk* chunk, bool copy_data);
|
||||
// Add new TrackingDataChunks. This method allows to streamline BoxTracker
|
||||
// usage. Instead of collecting all tracking chunks and then calling the
|
||||
// constructor, it makes possible to pass only minimally required chunks to
|
||||
// constructor, and then append new chunks when available.
|
||||
// Caller is responsible to guarantee that all chunks that are necessary for
|
||||
// tracking are at the disposal of BoxTracker before calling NewBoxTrack, i.e.
|
||||
// min_msec and max_msec parameters must be in the range of chunks passed to
|
||||
// BoxTracker.
|
||||
// Chunks will be added in the order they are specified in tracking_data. Each
|
||||
// chunk must be correctly aligned with existing chunks. If chunk starting
|
||||
// timestamp is larger than next valid chunk timestamp, empty chunks will be
|
||||
// added to fill the gap. If copy_data is true, BoxTracker will retain its own
|
||||
// copy of the data; otherwise the passed pointers need to be valid for the
|
||||
// lifetime of the BoxTracker.
|
||||
void AddTrackingDataChunks(
|
||||
const std::vector<const TrackingDataChunk*>& tracking_data,
|
||||
bool copy_data);
|
||||
|
||||
// Starts a new track for the specified box until tracking terminates or
|
||||
// until track length achieves max_length.
|
||||
// Does not block caller, returns immediately.
|
||||
// Note: Use positive integers for id, we reserve negative ones for debugging
|
||||
// and visualization purposes.
|
||||
void NewBoxTrack(const TimedBox& initial_pos, int id, int64 min_msec = 0,
|
||||
int64 max_msec = std::numeric_limits<int64>::max());
|
||||
|
||||
// Returns interval for which the state of the specified box is known.
|
||||
// (Returns -1, -1 if id is missing or no tracking has been done).
|
||||
std::pair<int64, int64> TrackInterval(int id);
|
||||
|
||||
// Returns box position for requested box at specified time.
|
||||
// Returns false if no such box exists.
|
||||
// Optional, if record_path_states is enabled in options outputs the
|
||||
// corresponding states for the resulting TimedBox. Note, as TimedBoxes are
|
||||
// generally interpolated from the underlying data and blended across
|
||||
// checkpoints, the returned states are the closest (snapped) known tracking
|
||||
// states for the left and right checkpoint path. Therefore the size of states
|
||||
// is either two or one (if only one checkpoint exists).
|
||||
bool GetTimedPosition(int id, int64 time_msec, TimedBox* result,
|
||||
std::vector<MotionBoxState>* states = nullptr);
|
||||
|
||||
// Returns chunk index for specified time.
|
||||
int ChunkIdxFromTime(int64 msec) const {
|
||||
return msec / options_.caching_chunk_size_msec();
|
||||
}
|
||||
|
||||
// Returns true if any tracking is ongoing for the specified id.
|
||||
bool IsTrackingOngoingForId(int id) LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Returns true if any tracking is ongoing.
|
||||
bool IsTrackingOngoing() LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Cancels all ongoing tracks. To avoid race conditions all NewBoxTrack's in
|
||||
// flight will also be canceled. Future NewBoxTrack's will be canceled.
|
||||
// NOTE: To resume execution, you have to call ResumeTracking() before
|
||||
// issuing more NewBoxTrack calls.
|
||||
void CancelAllOngoingTracks() LOCKS_EXCLUDED(status_mutex_);
|
||||
void ResumeTracking() LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Waits for all ongoing tracks to complete.
|
||||
// Optionally accepts a timeout in microseconds (== 0 for infinite wait).
|
||||
// Returns true on success, false if timeout is reached.
|
||||
// NOTE: If WaitForAllOngoingTracks timed out, CancelAllOngoingTracks() must
|
||||
// be called before destructing the BoxTracker object or dangeling running
|
||||
// threads might try to access invalid data.
|
||||
bool WaitForAllOngoingTracks(int timeout_us = 0)
|
||||
LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Debug function to obtain raw TrackingData closest to the specified
|
||||
// timestamp. This call will read from disk on every invocation so it is
|
||||
// expensive.
|
||||
// To not interfere with other tracking requests it is recommended that you
|
||||
// use a unique id here.
|
||||
// Returns true on success.
|
||||
bool GetTrackingData(int id, int64 request_time_msec,
|
||||
TrackingData* tracking_data,
|
||||
int* tracking_data_msec = nullptr);
|
||||
|
||||
private:
|
||||
// Asynchronous implementation function for box tracking. Schedules forward
|
||||
// and backward tracking.
|
||||
void NewBoxTrackAsync(const TimedBox& initial_pos, int id, int64 min_msec,
|
||||
int64 max_msec);
|
||||
|
||||
typedef std::pair<const TrackingDataChunk*, bool> AugmentedChunkPtr;
|
||||
// Attempts to read chunk at chunk_idx if it exists. Reads from cache
|
||||
// directory or from in memory cache.
|
||||
// Important: 2nd part of return value indicates if returned tracking data
|
||||
// will be owned by the caller (if true). In that case caller is responsible
|
||||
// for releasing the returned chunk.
|
||||
AugmentedChunkPtr ReadChunk(int id, int checkpoint, int chunk_idx);
|
||||
|
||||
// Attempts to read specified chunk from caching directory. Blocks and waits
|
||||
// until chunk is available or internal time out is reached.
|
||||
// Returns nullptr if data could not be read.
|
||||
std::unique_ptr<TrackingDataChunk> ReadChunkFromCache(int id, int checkpoint,
|
||||
int chunk_idx);
|
||||
|
||||
// Waits with timeout for chunkfile to become available. Returns true on
|
||||
// success, false if waited till timeout or when canceled.
|
||||
bool WaitForChunkFile(int id, int checkpoint, const std::string& chunk_file)
|
||||
LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Determines closest index in passed TrackingDataChunk
|
||||
int ClosestFrameIndex(int64 msec, const TrackingDataChunk& chunk) const;
|
||||
|
||||
// Adds new TimedBox to specified checkpoint with state.
|
||||
void AddBoxResult(const TimedBox& box, int id, int checkpoint,
|
||||
const MotionBoxState& state);
|
||||
|
||||
// Callback can only handle 5 args max.
|
||||
// Set own_data to true for args to assume ownership.
|
||||
struct TrackingImplArgs {
|
||||
TrackingImplArgs(AugmentedChunkPtr chunk_ptr,
|
||||
const MotionBoxState& start_state_, int start_frame_,
|
||||
int chunk_idx_, int id_, int checkpoint_, bool forward_,
|
||||
bool first_call_, int64 min_msec_, int64 max_msec_)
|
||||
: start_state(start_state_),
|
||||
start_frame(start_frame_),
|
||||
chunk_idx(chunk_idx_),
|
||||
id(id_),
|
||||
checkpoint(checkpoint_),
|
||||
forward(forward_),
|
||||
first_call(first_call_),
|
||||
min_msec(min_msec_),
|
||||
max_msec(max_msec_) {
|
||||
if (chunk_ptr.second) {
|
||||
chunk_data_buffer.reset(chunk_ptr.first);
|
||||
}
|
||||
|
||||
chunk_data = chunk_ptr.first;
|
||||
}
|
||||
|
||||
TrackingImplArgs(const TrackingImplArgs&) = default;
|
||||
|
||||
// Storage for tracking data.
|
||||
std::shared_ptr<const TrackingDataChunk> chunk_data_buffer;
|
||||
|
||||
// Pointer to the actual tracking data. Usually points to the buffer
|
||||
// but can also point to external data for performance reasons.
|
||||
const TrackingDataChunk* chunk_data;
|
||||
|
||||
MotionBoxState start_state;
|
||||
int start_frame;
|
||||
int chunk_idx;
|
||||
int id;
|
||||
int checkpoint;
|
||||
bool forward = true;
|
||||
bool first_call = true;
|
||||
int64 min_msec; // minimum timestamp to track to
|
||||
int64 max_msec; // maximum timestamp to track to
|
||||
};
|
||||
|
||||
// Actual tracking algorithm.
|
||||
void TrackingImpl(const TrackingImplArgs& args);
|
||||
|
||||
// Ids are scheduled exclusively, run this method to acquire lock.
|
||||
// Returns false if id could not be scheduled (e.g. id got canceled during
|
||||
// waiting).
|
||||
bool WaitToScheduleId(int id) LOCKS_EXCLUDED(status_mutex_);
|
||||
|
||||
// Signals end of scheduling phase. Requires status mutex to be held.
|
||||
void DoneSchedulingId(int id) EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
|
||||
|
||||
// Removes all checkpoints within vicinity of new checkpoint.
|
||||
void RemoveCloseCheckpoints(int id, int checkpoint)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
|
||||
|
||||
// Removes specific checkpoint.
|
||||
void ClearCheckpoint(int id, int checkpoint)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
|
||||
|
||||
// Terminates tracking for specific id and checkpoint.
|
||||
void CancelTracking(int id, int checkpoint)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
|
||||
|
||||
// Implementation function for IsTrackingOngoing assuming mutex is already
|
||||
// held.
|
||||
bool IsTrackingOngoingMutexHeld() EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
|
||||
|
||||
// Captures tracking status for each checkpoint
|
||||
struct TrackStatus {
|
||||
// Indicates that all current tracking processes should be canceled.
|
||||
bool canceled = false;
|
||||
|
||||
// Number of tracking requests than are currently ongoing.
|
||||
int tracks_ongoing = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
// Stores computed tracking paths_ for all boxes.
|
||||
std::unordered_map<int, Path> paths_ GUARDED_BY(path_mutex_);
|
||||
absl::Mutex path_mutex_;
|
||||
|
||||
// For each id and each checkpoint stores current tracking status.
|
||||
std::unordered_map<int, std::map<int, TrackStatus>> track_status_
|
||||
GUARDED_BY(status_mutex_);
|
||||
|
||||
// Keeps track which ids are currently processing in NewBoxTrack.
|
||||
std::unordered_map<int, bool> new_box_track_ GUARDED_BY(status_mutex_);
|
||||
absl::Mutex status_mutex_;
|
||||
|
||||
bool canceling_ GUARDED_BY(status_mutex_) = false;
|
||||
|
||||
// Use to signal changes to status_condvar_;
|
||||
absl::CondVar status_condvar_ GUARDED_BY(status_mutex_);
|
||||
|
||||
BoxTrackerOptions options_;
|
||||
|
||||
// Caching directory for TrackingData stored on disk.
|
||||
std::string cache_dir_;
|
||||
|
||||
// Pointers to tracking data stored in memory.
|
||||
std::vector<const TrackingDataChunk*> tracking_data_;
|
||||
// Buffer for tracking data in case we retain a deep copy.
|
||||
std::vector<std::unique_ptr<TrackingDataChunk>> tracking_data_buffer_;
|
||||
|
||||
// Workers that run the tracking algorithm.
|
||||
std::unique_ptr<ThreadPool> tracking_workers_;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
|
||||
87
mediapipe/util/tracking/box_tracker.proto
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/util/tracking/tracking.proto";
|
||||
|
||||
option java_package = "com.google.mediapipe.tracking";
|
||||
option java_outer_classname = "BoxTrackerProto";
|
||||
|
||||
message BoxTrackerOptions {
|
||||
// Chunk size for caching files. Should be equal to those
|
||||
// written by the FlowPackagerCalculator.
|
||||
optional int32 caching_chunk_size_msec = 1 [default = 2500];
|
||||
|
||||
// Chunk file format.
|
||||
optional string cache_file_format = 2 [default = "chunk_%04d"];
|
||||
|
||||
// Number of simultaneous tracking requests.
|
||||
optional int32 num_tracking_workers = 3 [default = 8];
|
||||
|
||||
// Maximum waiting time for next chunk, till function times out.
|
||||
optional int32 read_chunk_timeout_msec = 4 [default = 60000];
|
||||
|
||||
// If set, box tracker will record the state for each computed TimedBox
|
||||
// across all paths.
|
||||
optional bool record_path_states = 5 [default = false];
|
||||
|
||||
// Actual tracking options to be used for every step.
|
||||
optional TrackStepOptions track_step_options = 6;
|
||||
}
|
||||
|
||||
// Next tag: 13
|
||||
// Proto equivalent of struct TimedBox.
|
||||
message TimedBoxProto {
|
||||
// Normalized coords - in [0, 1]
|
||||
optional float top = 1;
|
||||
optional float left = 2;
|
||||
optional float bottom = 3;
|
||||
optional float right = 4;
|
||||
// Rotation of box w.r.t. center in radians.
|
||||
optional float rotation = 7;
|
||||
optional MotionBoxState.Quad quad = 9;
|
||||
optional int64 time_msec = 5 [default = 0];
|
||||
|
||||
// Unique per object id to disambiguate boxes.
|
||||
optional int32 id = 6 [default = -1];
|
||||
|
||||
// Confidence of box tracked in the range [0, 1], with 0 being least
|
||||
// confident, and 1 being most confident. A reasonable threshold is 0.5
|
||||
// to filter out unconfident boxes.
|
||||
optional float confidence = 8;
|
||||
|
||||
// Aspect ratio (width / height) for the tracked rectangle in physical space.
|
||||
// If this field is provided, quad tracking will be performed using
|
||||
// 6 degrees of freedom perspective transform between physical rectangle and
|
||||
// frame quad. Otherwise, 8 degrees of freedom homography tracking between
|
||||
// adjacent frames will be used.
|
||||
optional float aspect_ratio = 10;
|
||||
|
||||
// Whether or not to enable reacquisition functionality for this specific box.
|
||||
optional bool reacquisition = 11 [default = false];
|
||||
|
||||
// Whether we want this box to be potentially grouped with other boxes
|
||||
// to track together. This is useful for tracking small boxes that lie
|
||||
// on a plane. For example, when we detect a plane,
|
||||
// track the plane, then all boxes within the plane can share the same
|
||||
// homography transform.
|
||||
optional bool request_grouping = 12 [default = false];
|
||||
}
|
||||
|
||||
message TimedBoxProtoList {
|
||||
repeated TimedBoxProto box = 1;
|
||||
}
|
||||
117
mediapipe/util/tracking/box_tracker_test.cc
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/box_tracker.h"
|
||||
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
constexpr double kWidth = 1280.0;
|
||||
constexpr double kHeight = 720.0;
|
||||
|
||||
// Ground truth test; testing tracking accuracy and multi-thread load testing.
|
||||
TEST(BoxTrackerTest, MovingBoxTest) {
|
||||
const std::string cache_dir =
|
||||
file::JoinPath("./", "/mediapipe/util/tracking/testdata/box_tracker");
|
||||
BoxTracker box_tracker(cache_dir, BoxTrackerOptions());
|
||||
|
||||
// Ground truth positions of the overlay (linear in between).
|
||||
// @ 0: (50, 100)
|
||||
// @ 3000: (50, 400)
|
||||
// @ 6000: (500, 400)
|
||||
// @ 9000: (1000, 50)
|
||||
// @ 12000: (50, 100)
|
||||
// @ 15000: (1000, 400)
|
||||
//
|
||||
// size of overlay: 220 x 252
|
||||
std::vector<Vector2_d> positions{
|
||||
{50.0 / kWidth, 100.0 / kHeight}, {50.0 / kWidth, 400.0 / kHeight},
|
||||
{500.0 / kWidth, 400.0 / kHeight}, {1000.0 / kWidth, 50.0 / kHeight},
|
||||
{50.0 / kWidth, 100.0 / kHeight}, {1000.0 / kWidth, 400.0 / kHeight},
|
||||
};
|
||||
|
||||
const Vector2_d overlay_sz(220.0 / kWidth, 252.0 / kHeight);
|
||||
|
||||
TimedBox initial_pos;
|
||||
initial_pos.left = positions[1].x();
|
||||
initial_pos.top = positions[1].y();
|
||||
initial_pos.right = initial_pos.left + overlay_sz.x();
|
||||
initial_pos.bottom = initial_pos.top + overlay_sz.y();
|
||||
initial_pos.time_msec = 3000;
|
||||
|
||||
// Test multithreading under load, ensure this does not crash or stall.
|
||||
box_tracker.NewBoxTrack(initial_pos, 0);
|
||||
// Cancel right after issuing.
|
||||
box_tracker.CancelAllOngoingTracks();
|
||||
|
||||
// Should not be scheduled.
|
||||
box_tracker.NewBoxTrack(initial_pos, 0);
|
||||
EXPECT_FALSE(box_tracker.IsTrackingOngoing());
|
||||
box_tracker.ResumeTracking();
|
||||
|
||||
box_tracker.NewBoxTrack(initial_pos, 0);
|
||||
// Two cancelations in a row should not block.
|
||||
box_tracker.CancelAllOngoingTracks();
|
||||
box_tracker.CancelAllOngoingTracks();
|
||||
box_tracker.ResumeTracking();
|
||||
|
||||
// Start again for real this time.
|
||||
box_tracker.NewBoxTrack(initial_pos, 0);
|
||||
|
||||
// Wait to terminate.
|
||||
box_tracker.WaitForAllOngoingTracks();
|
||||
|
||||
// Check that tracking did not abort.
|
||||
EXPECT_EQ(0, box_tracker.TrackInterval(0).first);
|
||||
EXPECT_GT(box_tracker.TrackInterval(0).second, 15000);
|
||||
|
||||
auto boxes_equal = [](const TimedBox& lhs, const TimedBox& rhs) {
|
||||
constexpr float kAccuracy = 0.015f;
|
||||
return (lhs.time_msec == rhs.time_msec &&
|
||||
std::abs(lhs.top - rhs.top) < kAccuracy &&
|
||||
std::abs(lhs.left - rhs.left) < kAccuracy &&
|
||||
std::abs(lhs.right - rhs.right) < kAccuracy &&
|
||||
std::abs(lhs.bottom - rhs.bottom) < kAccuracy);
|
||||
};
|
||||
|
||||
for (int k = 0; k < 15000; k += 33) {
|
||||
TimedBox box;
|
||||
EXPECT_TRUE(box_tracker.GetTimedPosition(0, k, &box));
|
||||
|
||||
// One groundtruth position every 3s, linear in between.
|
||||
const int rect_pos = k / 3000;
|
||||
const int dt = k - rect_pos * 3000;
|
||||
const int next_rect_pos = dt == 0 ? rect_pos : (rect_pos + 1);
|
||||
|
||||
// Blend to get initial position.
|
||||
const double alpha = dt / 3000.0;
|
||||
const Vector2_d gt_pos =
|
||||
(1.0 - alpha) * positions[rect_pos] + alpha * positions[next_rect_pos];
|
||||
|
||||
TimedBox gt_box;
|
||||
gt_box.time_msec = k;
|
||||
gt_box.top = gt_pos.y();
|
||||
gt_box.left = gt_pos.x();
|
||||
gt_box.right = gt_box.left + overlay_sz.x();
|
||||
gt_box.bottom = gt_box.top + overlay_sz.y();
|
||||
EXPECT_TRUE(boxes_equal(gt_box, box));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace mediapipe
|
||||
408
mediapipe/util/tracking/camera_motion.cc
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/camera_motion.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
void CameraMotionToTranslation(const CameraMotion& camera_motion,
|
||||
TranslationModel* model) {
|
||||
if (camera_motion.has_translation()) {
|
||||
model->CopyFrom(camera_motion.translation());
|
||||
} else {
|
||||
model->Clear();
|
||||
}
|
||||
}
|
||||
|
||||
void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
|
||||
LinearSimilarityModel* model) {
|
||||
if (camera_motion.has_linear_similarity()) {
|
||||
model->CopyFrom(camera_motion.linear_similarity());
|
||||
} else {
|
||||
TranslationModel translation;
|
||||
CameraMotionToTranslation(camera_motion, &translation);
|
||||
model->CopyFrom(LinearSimilarityAdapter::Embed(translation));
|
||||
}
|
||||
}
|
||||
|
||||
void CameraMotionToAffine(const CameraMotion& camera_motion,
|
||||
AffineModel* model) {
|
||||
if (camera_motion.has_affine()) {
|
||||
model->CopyFrom(camera_motion.affine());
|
||||
} else {
|
||||
LinearSimilarityModel similarity;
|
||||
CameraMotionToLinearSimilarity(camera_motion, &similarity);
|
||||
model->CopyFrom(AffineAdapter::Embed(similarity));
|
||||
}
|
||||
}
|
||||
|
||||
void CameraMotionToHomography(const CameraMotion& camera_motion,
|
||||
Homography* model) {
|
||||
if (camera_motion.has_homography()) {
|
||||
model->CopyFrom(camera_motion.homography());
|
||||
} else {
|
||||
AffineModel affine;
|
||||
CameraMotionToAffine(camera_motion, &affine);
|
||||
model->CopyFrom(HomographyAdapter::Embed(affine));
|
||||
}
|
||||
}
|
||||
|
||||
void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
|
||||
MixtureHomography* model) {
|
||||
if (camera_motion.has_mixture_homography()) {
|
||||
model->CopyFrom(camera_motion.mixture_homography());
|
||||
} else {
|
||||
Homography homography;
|
||||
CameraMotionToHomography(camera_motion, &homography);
|
||||
model->CopyFrom(MixtureHomographyAdapter::Embed(homography, 1));
|
||||
}
|
||||
}
|
||||
|
||||
CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
|
||||
const CameraMotion& rhs) {
|
||||
CHECK_EQ(lhs.frame_width(), rhs.frame_width());
|
||||
CHECK_EQ(lhs.frame_height(), rhs.frame_height());
|
||||
|
||||
CameraMotion result = rhs;
|
||||
if (lhs.has_translation() || rhs.has_translation()) {
|
||||
*result.mutable_translation() =
|
||||
ModelCompose2(lhs.translation(), rhs.translation());
|
||||
}
|
||||
|
||||
if (lhs.has_similarity() || rhs.has_similarity()) {
|
||||
*result.mutable_similarity() =
|
||||
ModelCompose2(lhs.similarity(), rhs.similarity());
|
||||
}
|
||||
|
||||
if (lhs.has_linear_similarity() || rhs.has_linear_similarity()) {
|
||||
*result.mutable_linear_similarity() =
|
||||
ModelCompose2(lhs.linear_similarity(), rhs.linear_similarity());
|
||||
}
|
||||
|
||||
if (lhs.has_affine() || rhs.has_affine()) {
|
||||
*result.mutable_affine() = ModelCompose2(lhs.affine(), rhs.affine());
|
||||
}
|
||||
|
||||
if (lhs.has_homography() || rhs.has_homography()) {
|
||||
*result.mutable_homography() =
|
||||
ModelCompose2(lhs.homography(), rhs.homography());
|
||||
}
|
||||
|
||||
if (rhs.has_mixture_homography()) {
|
||||
if (lhs.has_mixture_homography()) {
|
||||
LOG(ERROR) << "Mixture homographies are not closed under composition, "
|
||||
<< "Only rhs mixtures composed with lhs homographies "
|
||||
<< "are supported.";
|
||||
} else if (lhs.type() <= CameraMotion::UNSTABLE_SIM) {
|
||||
// We only composit base model when stability is sufficient.
|
||||
*result.mutable_mixture_homography() =
|
||||
MixtureHomographyAdapter::ComposeLeft(rhs.mixture_homography(),
|
||||
lhs.homography());
|
||||
}
|
||||
} else if (lhs.has_mixture_homography()) {
|
||||
LOG(ERROR) << "Only rhs mixtures supported.";
|
||||
}
|
||||
|
||||
// Select max unstable type.
|
||||
result.set_type(std::max(lhs.type(), rhs.type()));
|
||||
result.set_average_magnitude(lhs.average_magnitude() +
|
||||
rhs.average_magnitude());
|
||||
result.set_translation_variance(
|
||||
std::max(lhs.translation_variance(), rhs.translation_variance()));
|
||||
result.set_similarity_inlier_ratio(
|
||||
std::min(lhs.similarity_inlier_ratio(), rhs.similarity_inlier_ratio()));
|
||||
|
||||
result.set_similarity_strict_inlier_ratio(
|
||||
std::min(lhs.similarity_strict_inlier_ratio(),
|
||||
rhs.similarity_strict_inlier_ratio()));
|
||||
|
||||
result.set_average_homography_error(
|
||||
std::max(lhs.average_homography_error(), rhs.average_homography_error()));
|
||||
result.set_homography_inlier_coverage(std::min(
|
||||
lhs.homography_inlier_coverage(), rhs.homography_inlier_coverage()));
|
||||
result.set_homography_strict_inlier_coverage(
|
||||
std::min(lhs.homography_strict_inlier_coverage(),
|
||||
rhs.homography_strict_inlier_coverage()));
|
||||
|
||||
// TODO: Overlay stuff.
|
||||
|
||||
result.set_flags(lhs.flags() | rhs.flags());
|
||||
result.set_timestamp_usec(
|
||||
std::max(lhs.timestamp_usec(), rhs.timestamp_usec()));
|
||||
result.set_match_frame(lhs.match_frame() + rhs.match_frame());
|
||||
|
||||
// TODO: Rest.
|
||||
return result;
|
||||
}
|
||||
|
||||
CameraMotion InvertCameraMotion(const CameraMotion& motion) {
|
||||
CameraMotion inverted = motion;
|
||||
if (motion.has_translation()) {
|
||||
*inverted.mutable_translation() = ModelInvert(motion.translation());
|
||||
}
|
||||
|
||||
if (motion.has_similarity()) {
|
||||
*inverted.mutable_similarity() = ModelInvert(motion.similarity());
|
||||
}
|
||||
|
||||
if (motion.has_linear_similarity()) {
|
||||
*inverted.mutable_linear_similarity() =
|
||||
ModelInvert(motion.linear_similarity());
|
||||
}
|
||||
|
||||
if (motion.has_affine()) {
|
||||
*inverted.mutable_affine() = ModelInvert(motion.affine());
|
||||
}
|
||||
|
||||
if (motion.has_homography()) {
|
||||
*inverted.mutable_homography() = ModelInvert(motion.homography());
|
||||
}
|
||||
|
||||
if (motion.has_mixture_homography()) {
|
||||
LOG(ERROR) << "Mixture homographies are not closed under inversion.";
|
||||
}
|
||||
|
||||
return inverted;
|
||||
}
|
||||
|
||||
void SubtractCameraMotionFromFeatures(
|
||||
const std::vector<CameraMotion>& camera_motions,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists) {
|
||||
CHECK(feature_lists != nullptr);
|
||||
CHECK_GE(camera_motions.size(), feature_lists->size());
|
||||
if (feature_lists->empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool use_mixtures = camera_motions[0].has_mixture_homography();
|
||||
|
||||
std::unique_ptr<MixtureRowWeights> row_weights;
|
||||
if (use_mixtures) {
|
||||
row_weights.reset(MixtureRowWeightsFromCameraMotion(
|
||||
camera_motions[0], (*feature_lists)[0]->frame_height()));
|
||||
}
|
||||
|
||||
for (int k = 0; k < feature_lists->size(); ++k) {
|
||||
Homography background_model;
|
||||
MixtureHomography background_model_mixture;
|
||||
if (use_mixtures) {
|
||||
CameraMotionToMixtureHomography(camera_motions[k],
|
||||
&background_model_mixture);
|
||||
} else {
|
||||
CameraMotionToHomography(camera_motions[k], &background_model);
|
||||
}
|
||||
|
||||
// Remove motion due to camera motion, leaving only foreground motion.
|
||||
for (auto& feature : *(*feature_lists)[k]->mutable_feature()) {
|
||||
const Vector2_f background_motion =
|
||||
(!use_mixtures ? HomographyAdapter::TransformPoint(
|
||||
background_model, FeatureLocation(feature))
|
||||
: MixtureHomographyAdapter::TransformPoint(
|
||||
background_model_mixture, *row_weights,
|
||||
FeatureLocation(feature))) -
|
||||
FeatureLocation(feature);
|
||||
const Vector2_f object_motion = FeatureFlow(feature) - background_motion;
|
||||
feature.set_dx(object_motion.x());
|
||||
feature.set_dy(object_motion.y());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float ForegroundMotion(const CameraMotion& camera_motion,
|
||||
const RegionFlowFeatureList& feature_list) {
|
||||
if (camera_motion.has_mixture_homography()) {
|
||||
LOG(WARNING) << "Mixture homographies are present but function is only "
|
||||
<< "using homographies. Truncation error likely.";
|
||||
}
|
||||
|
||||
Homography background_motion;
|
||||
CameraMotionToHomography(camera_motion, &background_motion);
|
||||
|
||||
float foreground_motion = 0;
|
||||
for (auto& feature : feature_list.feature()) {
|
||||
const float error = (FeatureMatchLocation(feature) -
|
||||
HomographyAdapter::TransformPoint(
|
||||
background_motion, FeatureLocation(feature)))
|
||||
.Norm();
|
||||
foreground_motion += error;
|
||||
}
|
||||
|
||||
if (feature_list.feature_size() > 0) {
|
||||
foreground_motion *= 1.0f / feature_list.feature_size();
|
||||
}
|
||||
|
||||
return foreground_motion;
|
||||
}
|
||||
|
||||
void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
|
||||
CameraMotion* camera_motion) {
|
||||
camera_motion->set_blur_score(feature_list.blur_score());
|
||||
camera_motion->set_frac_long_features_rejected(
|
||||
feature_list.frac_long_features_rejected());
|
||||
camera_motion->set_timestamp_usec(feature_list.timestamp_usec());
|
||||
camera_motion->set_match_frame(feature_list.match_frame());
|
||||
camera_motion->set_frame_width(feature_list.frame_width());
|
||||
camera_motion->set_frame_height(feature_list.frame_height());
|
||||
}
|
||||
|
||||
std::string CameraMotionFlagToString(const CameraMotion& camera_motion) {
|
||||
std::string text;
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) {
|
||||
text += "SHOT_BOUNDARY|";
|
||||
}
|
||||
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_BLURRY_FRAME) {
|
||||
text += absl::StrFormat("BLURRY_FRAME %.2f|", camera_motion.bluriness());
|
||||
}
|
||||
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_MAJOR_OVERLAY) {
|
||||
text += "MAJOR_OVERLAY|";
|
||||
}
|
||||
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_SHARP_FRAME) {
|
||||
text += "SHARP_FRAME|";
|
||||
}
|
||||
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_SHOT_FADE) {
|
||||
text += "SHOT_FADE|";
|
||||
}
|
||||
|
||||
if (camera_motion.flags() & CameraMotion::FLAG_DUPLICATED) {
|
||||
text += "DUPLICATED|";
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
std::string CameraMotionTypeToString(const CameraMotion& motion) {
|
||||
switch (motion.type()) {
|
||||
case CameraMotion::VALID:
|
||||
return "VALID";
|
||||
case CameraMotion::UNSTABLE_HOMOG:
|
||||
return "UNSTABLE_HOMOG";
|
||||
case CameraMotion::UNSTABLE_SIM:
|
||||
return "UNSTABLE_SIM";
|
||||
case CameraMotion::UNSTABLE:
|
||||
return "UNSTABLE";
|
||||
case CameraMotion::INVALID:
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
return "NEVER HAPPENS WITH CLANG";
|
||||
}
|
||||
|
||||
float InlierCoverage(const CameraMotion& camera_motion,
|
||||
bool use_homography_coverage) {
|
||||
const int num_block_coverages = camera_motion.mixture_inlier_coverage_size();
|
||||
if (num_block_coverages == 0 || use_homography_coverage) {
|
||||
if (camera_motion.has_homography()) {
|
||||
return camera_motion.homography_inlier_coverage();
|
||||
} else {
|
||||
return 1.0f;
|
||||
}
|
||||
} else {
|
||||
return std::accumulate(camera_motion.mixture_inlier_coverage().begin(),
|
||||
camera_motion.mixture_inlier_coverage().end(),
|
||||
0.0f) *
|
||||
(1.0f / num_block_coverages);
|
||||
}
|
||||
}
|
||||
|
||||
template <class CameraMotionContainer>
|
||||
CameraMotion FirstCameraMotionForLooping(
|
||||
const CameraMotionContainer& camera_motions) {
|
||||
if (camera_motions.size() < 2) {
|
||||
LOG(ERROR) << "Not enough camera motions for refinement.";
|
||||
return CameraMotion();
|
||||
}
|
||||
|
||||
CameraMotion loop_motion = camera_motions[0];
|
||||
|
||||
// Only update motions present in first camera motion.
|
||||
bool has_translation = loop_motion.has_translation();
|
||||
bool has_similarity = loop_motion.has_linear_similarity();
|
||||
bool has_homography = loop_motion.has_homography();
|
||||
|
||||
TranslationModel translation;
|
||||
LinearSimilarityModel similarity;
|
||||
Homography homography;
|
||||
|
||||
for (int i = 1; i < camera_motions.size(); ++i) {
|
||||
const CameraMotion& motion = camera_motions[i];
|
||||
if (motion.has_mixture_homography()) {
|
||||
// TODO: Implement
|
||||
LOG(WARNING) << "This function does not validly apply mixtures; "
|
||||
<< "which are currently not closed under composition. ";
|
||||
}
|
||||
|
||||
switch (motion.type()) {
|
||||
case CameraMotion::INVALID:
|
||||
has_translation = false;
|
||||
has_similarity = false;
|
||||
has_homography = false;
|
||||
break;
|
||||
case CameraMotion::UNSTABLE:
|
||||
has_similarity = false;
|
||||
has_homography = false;
|
||||
break;
|
||||
case CameraMotion::UNSTABLE_SIM:
|
||||
has_homography = false;
|
||||
break;
|
||||
case CameraMotion::VALID:
|
||||
case CameraMotion::UNSTABLE_HOMOG:
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown CameraMotion::type.";
|
||||
}
|
||||
|
||||
// Only accumulate motions which are valid for the entire chain, otherwise
|
||||
// keep the pre-initialized motions.
|
||||
has_translation =
|
||||
has_translation && motion.has_translation() &&
|
||||
AccumulateInvertedModel(motion.translation(), &translation);
|
||||
has_similarity =
|
||||
has_similarity && motion.has_linear_similarity() &&
|
||||
AccumulateInvertedModel(motion.linear_similarity(), &similarity);
|
||||
has_homography = has_homography && motion.has_homography() &&
|
||||
AccumulateInvertedModel(motion.homography(), &homography);
|
||||
}
|
||||
|
||||
if (has_translation) {
|
||||
*loop_motion.mutable_translation() = translation;
|
||||
}
|
||||
if (has_similarity && IsInverseStable(similarity)) {
|
||||
*loop_motion.mutable_linear_similarity() = similarity;
|
||||
}
|
||||
if (has_homography && IsInverseStable(homography)) {
|
||||
*loop_motion.mutable_homography() = homography;
|
||||
}
|
||||
|
||||
VLOG(1) << "Looping camera motion refinement for "
|
||||
<< " translation:" << (has_translation ? "successful" : "failed")
|
||||
<< " similarity:" << (has_similarity ? "successful" : "failed")
|
||||
<< " homography:" << (has_homography ? "successful" : "failed");
|
||||
|
||||
return loop_motion;
|
||||
}
|
||||
|
||||
// Explicit instantiation.
|
||||
template CameraMotion FirstCameraMotionForLooping<std::vector<CameraMotion>>(
|
||||
const std::vector<CameraMotion>&);
|
||||
template CameraMotion FirstCameraMotionForLooping<std::deque<CameraMotion>>(
|
||||
const std::deque<CameraMotion>&);
|
||||
} // namespace mediapipe
|
||||
336
mediapipe/util/tracking/camera_motion.h
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Helper functions to extract specific models from CameraMotion.
|
||||
// Returned is always the requested model. In case, model is not present (i.e.
|
||||
// has_<motion model> fails), the highest degree of freedom model
|
||||
// (lower or equal to the requested model) that is present is embedded in the
|
||||
// requested model.
|
||||
// Presence of the model depends on wich models were requesteded to be
|
||||
// estimated (via MotionEstimationOptions, to initialize requested models to
|
||||
// identity, use ResetMotionModels above). For example, assume linear similarity
|
||||
// was not requested to be estimated, but affine was requested. If
|
||||
// CameraMotionToLinearSimilarity is called, has_linear_similarity would be
|
||||
// false and the function fall back returning a translation model.
|
||||
void CameraMotionToTranslation(const CameraMotion& camera_motion,
|
||||
TranslationModel* model);
|
||||
void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
|
||||
LinearSimilarityModel* model);
|
||||
void CameraMotionToAffine(const CameraMotion& camera_motion,
|
||||
AffineModel* model);
|
||||
void CameraMotionToHomography(const CameraMotion& camera_motion,
|
||||
Homography* homography);
|
||||
void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
|
||||
MixtureHomography* mixture);
|
||||
|
||||
// TODO: Under development ...
|
||||
// Returns camera motion lhs * rhs. Initial camera motion is set to rhs
|
||||
// before composition.
|
||||
CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
|
||||
const CameraMotion& rhs);
|
||||
|
||||
// Inverts every motion model that is set in CameraMotion.
|
||||
CameraMotion InvertCameraMotion(const CameraMotion& motion);
|
||||
|
||||
// Templated wrapper for above calls.
|
||||
template <class Model>
|
||||
Model CameraMotionToModel(const CameraMotion& camera_motion);
|
||||
|
||||
// Returns model from passed CameraMotion specified by unstable_type
|
||||
// (which must name a type != VALID, CHECK-ed) and embeds it in the specified
|
||||
// Model.
|
||||
template <class Model>
|
||||
Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
|
||||
CameraMotion::Type unstable_type);
|
||||
|
||||
// Projects passed model to lower degree of freedom model (embedded in original
|
||||
// type), as specified type. In case type is valid, function is effectively
|
||||
// identity function.
|
||||
// Only implemented for the following models:
|
||||
// - Translation
|
||||
// - LinearSimilarity
|
||||
// - AffineModel
|
||||
template <class Model>
|
||||
Model ProjectToTypeModel(const Model& model, float frame_width,
|
||||
float frame_height, CameraMotion::Type type);
|
||||
|
||||
// Substract camera motion (specifically highest, degree of freedom model,
|
||||
// that has been estimated reliably) from feature lists. Operates on vectors
|
||||
// for improved performance. Size of camera_motions can be larger than
|
||||
// feature_lists, in this case last camera motions are ignored.
|
||||
void SubtractCameraMotionFromFeatures(
|
||||
const std::vector<CameraMotion>& camera_motions,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists);
|
||||
|
||||
// Returns average motion magnitude after subtracting camera motion.
|
||||
float ForegroundMotion(const CameraMotion& camera_motion,
|
||||
const RegionFlowFeatureList& feature_list);
|
||||
|
||||
// Initializes a CameraMotion with its corresponding fields from a
|
||||
// RegionFlowFeatureList.
|
||||
void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
// Converts Camera motion flag to std::string.
|
||||
std::string CameraMotionFlagToString(const CameraMotion& motion);
|
||||
|
||||
// Converts Camera motion type to std::string. Used instead of builtin proto
|
||||
// function for mobile support.
|
||||
std::string CameraMotionTypeToString(const CameraMotion& motion);
|
||||
|
||||
// Returns inlier coverage either based on mixture (if present, in this case
|
||||
// return mean of block coverages) or else homography.
|
||||
// If neither is present, returns 0 to signal insufficient inliers.
|
||||
// If use_homography_coverage is set, uses homography even when mixture is
|
||||
// present.
|
||||
float InlierCoverage(const CameraMotion& camera_motion,
|
||||
bool use_homography_coverage);
|
||||
|
||||
// Downsamples passed motion models temporally by specified downsample_scale,
|
||||
// i.e. for models F_0, F_1, F_2, F_3, F_4 and downsample_scale of 2, models:
|
||||
// F_0 * F_1, F_2 * F_3 and F_4 are returned.
|
||||
// Optionally also performs downsampling of corresponding model_type returning
|
||||
// the least unstable for each composition.
|
||||
template <class Model>
|
||||
void DownsampleMotionModels(
|
||||
const std::vector<Model>& models,
|
||||
const std::vector<CameraMotion::Type>* model_type, // optional.
|
||||
int downsample_scale, std::vector<Model>* downsampled_models,
|
||||
std::vector<CameraMotion::Type>* downsampled_types);
|
||||
|
||||
// Compatible subsampling method to above DownsampleMotionModels.
|
||||
// Note, when downsampling for example:
|
||||
// F_0, F_1, F_2, F_3, F_4 by factor 3 via above function, downsampled result
|
||||
// will be F_0 * F_1 * F_2, F_3 * F_4
|
||||
// so we would need to pick entities at F_2 and F_4.
|
||||
// Template class Container must be SequenceContainer, like
|
||||
// std::vector, std::deque.
|
||||
template <class Container>
|
||||
void SubsampleEntities(const Container& input, int downsample_scale,
|
||||
Container* output);
|
||||
|
||||
// For perfect looping, this function computes the motion in the first frame
|
||||
// to be the inverse of the accumulated motion from frame 1 to N.
|
||||
// If a particular motion type is not available or not invertible at any
|
||||
// frame pair, the original motion for that type is retained.
|
||||
// Does not work if mixtures are present.
|
||||
template <class CameraMotionContainer> // STL container of CameraMotion's
|
||||
CameraMotion FirstCameraMotionForLooping(
|
||||
const CameraMotionContainer& container);
|
||||
|
||||
// Template implementation functions.
|
||||
|
||||
template <class Model>
|
||||
Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
|
||||
CameraMotion::Type unstable_type) {
|
||||
switch (unstable_type) {
|
||||
case CameraMotion::INVALID:
|
||||
return Model(); // Identity.
|
||||
|
||||
case CameraMotion::UNSTABLE: {
|
||||
return ModelAdapter<Model>::Embed(
|
||||
CameraMotionToModel<TranslationModel>(camera_motion));
|
||||
}
|
||||
|
||||
case CameraMotion::UNSTABLE_SIM: {
|
||||
return ModelAdapter<Model>::Embed(
|
||||
CameraMotionToModel<LinearSimilarityModel>(camera_motion));
|
||||
}
|
||||
|
||||
case CameraMotion::UNSTABLE_HOMOG: {
|
||||
return ModelAdapter<Model>::Embed(
|
||||
CameraMotionToModel<Homography>(camera_motion));
|
||||
}
|
||||
|
||||
case CameraMotion::VALID:
|
||||
LOG(FATAL) << "Specify a type != VALID";
|
||||
return Model();
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline TranslationModel ProjectToTypeModel(const TranslationModel& model,
|
||||
float frame_width,
|
||||
float frame_height,
|
||||
CameraMotion::Type type) {
|
||||
switch (type) {
|
||||
case CameraMotion::INVALID:
|
||||
return TranslationModel(); // Identity.
|
||||
default:
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline LinearSimilarityModel ProjectToTypeModel(
|
||||
const LinearSimilarityModel& model, float frame_width, float frame_height,
|
||||
CameraMotion::Type type) {
|
||||
switch (type) {
|
||||
case CameraMotion::INVALID:
|
||||
return LinearSimilarityModel(); // Identity.
|
||||
|
||||
case CameraMotion::UNSTABLE:
|
||||
return LinearSimilarityAdapter::Embed(
|
||||
TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
|
||||
|
||||
default:
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Model>
|
||||
Model ProjectToTypeModel(const Model& model, float frame_width,
|
||||
float frame_height, CameraMotion::Type type) {
|
||||
switch (type) {
|
||||
case CameraMotion::INVALID:
|
||||
return Model(); // Identity.
|
||||
|
||||
case CameraMotion::UNSTABLE:
|
||||
return ModelAdapter<Model>::Embed(
|
||||
TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
|
||||
|
||||
case CameraMotion::UNSTABLE_SIM:
|
||||
return ModelAdapter<Model>::Embed(LinearSimilarityAdapter::ProjectFrom(
|
||||
model, frame_width, frame_height));
|
||||
|
||||
// case UNSTABLE_HOMOG does not occur except for mixtures.
|
||||
|
||||
default:
|
||||
return model;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline MixtureHomography ProjectToTypeModel(const MixtureHomography&, float,
|
||||
float, CameraMotion::Type) {
|
||||
LOG(FATAL) << "Projection not supported for mixtures.";
|
||||
return MixtureHomography();
|
||||
}
|
||||
|
||||
template <class Model>
|
||||
void DownsampleMotionModels(
|
||||
const std::vector<Model>& models,
|
||||
const std::vector<CameraMotion::Type>* model_type, int downsample_scale,
|
||||
std::vector<Model>* downsampled_models,
|
||||
std::vector<CameraMotion::Type>* downsampled_types) {
|
||||
if (model_type) {
|
||||
CHECK_EQ(models.size(), model_type->size());
|
||||
CHECK(downsampled_models) << "Expecting output models.";
|
||||
}
|
||||
|
||||
CHECK(downsampled_models);
|
||||
downsampled_models->clear();
|
||||
if (downsampled_types) {
|
||||
downsampled_types->clear();
|
||||
}
|
||||
|
||||
const int num_models = models.size();
|
||||
|
||||
for (int model_idx = 0; model_idx < num_models;
|
||||
model_idx += downsample_scale) {
|
||||
const int last_idx =
|
||||
std::min<int>(model_idx + downsample_scale, num_models) - 1;
|
||||
|
||||
CameraMotion::Type sampled_type = CameraMotion::VALID;
|
||||
if (model_type) {
|
||||
// Get least stable model within downsample window (max operation).
|
||||
for (int i = model_idx; i <= last_idx; ++i) {
|
||||
sampled_type = std::max(sampled_type, model_type->at(i));
|
||||
}
|
||||
downsampled_types->push_back(sampled_type);
|
||||
}
|
||||
|
||||
// Concatenate models.
|
||||
Model composed = models[last_idx];
|
||||
|
||||
for (int i = last_idx - 1; i >= model_idx; --i) {
|
||||
composed = ModelCompose2(models[i], composed);
|
||||
}
|
||||
|
||||
downsampled_models->push_back(composed);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Container>
|
||||
void SubsampleEntities(const Container& input, int downsample_factor,
|
||||
Container* output) {
|
||||
CHECK(output);
|
||||
output->clear();
|
||||
|
||||
if (input.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int k = downsample_factor - 1; k < input.size();
|
||||
k += downsample_factor) {
|
||||
output->push_back(input[k]);
|
||||
}
|
||||
|
||||
if (input.size() % downsample_factor != 0) {
|
||||
// We need to add last constraint as termination.
|
||||
output->push_back(input.back());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline TranslationModel CameraMotionToModel(const CameraMotion& camera_motion) {
|
||||
TranslationModel model;
|
||||
CameraMotionToTranslation(camera_motion, &model);
|
||||
return model;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline LinearSimilarityModel CameraMotionToModel(
|
||||
const CameraMotion& camera_motion) {
|
||||
LinearSimilarityModel model;
|
||||
CameraMotionToLinearSimilarity(camera_motion, &model);
|
||||
return model;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline AffineModel CameraMotionToModel(const CameraMotion& camera_motion) {
|
||||
AffineModel model;
|
||||
CameraMotionToAffine(camera_motion, &model);
|
||||
return model;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Homography CameraMotionToModel(const CameraMotion& camera_motion) {
|
||||
Homography model;
|
||||
CameraMotionToHomography(camera_motion, &model);
|
||||
return model;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline MixtureHomography CameraMotionToModel(
|
||||
const CameraMotion& camera_motion) {
|
||||
MixtureHomography model;
|
||||
CameraMotionToMixtureHomography(camera_motion, &model);
|
||||
return model;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
|
||||
198
mediapipe/util/tracking/camera_motion.proto
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Describes camera motion between two frames with various degree of freedom
|
||||
// parametric motion models.
|
||||
// In addition, stores features describing how reliable the estimated motion
|
||||
// model is.
|
||||
// Flags indicate several properties derived from the camera motion, e.g. if a
|
||||
// frame is sharp, blurry or contains overlays.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/util/tracking/motion_models.proto";
|
||||
|
||||
// Next tag: 33
|
||||
message CameraMotion {
|
||||
// Background motion expressed in various models.
|
||||
// These are per-frame pair motions (from current to previous frame).
|
||||
// Models are expressed in the un-normalized domain frame_width x frame_height
|
||||
// that is passed to MotionEstimation (storred below).
|
||||
optional TranslationModel translation = 1;
|
||||
optional SimilarityModel similarity = 2;
|
||||
optional LinearSimilarityModel linear_similarity = 3;
|
||||
optional AffineModel affine = 4;
|
||||
optional Homography homography = 5;
|
||||
|
||||
optional MixtureHomography mixture_homography = 8;
|
||||
|
||||
// Frame dimensions camera motion was computed over.
|
||||
optional float frame_width = 31;
|
||||
optional float frame_height = 32;
|
||||
|
||||
// Mixture homographies computed w.r.t. exponentially increasing
|
||||
// regularizers. Above mixture_homography member is selected from spectrum
|
||||
// based on amount of rolling shutter present in the video.
|
||||
repeated MixtureHomography mixture_homography_spectrum = 23;
|
||||
|
||||
// Relative row sigma w.r.t. frame_height for mixture models.
|
||||
optional float mixture_row_sigma = 10;
|
||||
|
||||
// Average of all motion vector magnitudes (without accounting for any motion
|
||||
// model), within 10th to 90th percentile (to remove outliers).
|
||||
optional float average_magnitude = 24 [default = 0.0];
|
||||
|
||||
// Inlier-weighted variance of the translation model.
|
||||
// Specified, w.r.t. unnormalized video domain that motion models
|
||||
// are computed for.
|
||||
optional float translation_variance = 25 [default = 0.0];
|
||||
|
||||
// Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
|
||||
optional float similarity_inlier_ratio = 29 [default = 0];
|
||||
optional float similarity_strict_inlier_ratio = 30 [default = 0];
|
||||
|
||||
// Average registration error of homography in pixels.
|
||||
// Note: These two parameters default to zero in-case homographies have not
|
||||
// been estimated.
|
||||
optional float average_homography_error = 11;
|
||||
|
||||
// Fraction, in [0,1], of homography inliers.
|
||||
optional float homography_inlier_coverage = 12;
|
||||
|
||||
// Same as above but with stricter threshold.
|
||||
// (For details, see: MotionEstimationOptions::strict_coverage_scale).
|
||||
// Coverage is designed to measure the amount of significant outliers,
|
||||
// which can affect the validity of the estimated homography.
|
||||
// However, it does not discount small outliers, which occur in case
|
||||
// of small rolling shutter wobbles. For this a stricter version of coverage
|
||||
// is needed, which is essential for computing the rolling_shutter_guess,
|
||||
// i.e. the increase in coverage by using mixtures vs. homographies.
|
||||
optional float homography_strict_inlier_coverage = 22;
|
||||
|
||||
// Per-block inlier fraction for mixtures.
|
||||
repeated float mixture_inlier_coverage = 13;
|
||||
|
||||
// Set based on stability analysis indicating if frame is likely to originate
|
||||
// from a rolling shutter camera. (-1 is used to indicate frame was not
|
||||
// tested, e.g. due to mixture deemed unstable for analysis).
|
||||
// Guess is a scaler indicating by how much the mixture models (suitable for
|
||||
// rolling shutter distortions) increased inlier coverage compared to a single
|
||||
// homography. For example a value, of 1.3 indicates, that the mixture models
|
||||
// increased inlier coverage by 30%.
|
||||
// If not -1, range is in [0, inf] (values slightly smaller than 1 are
|
||||
// possible due to suppression of noisy feature tracks during estimation).
|
||||
optional float rolling_shutter_guess = 14;
|
||||
|
||||
// Indicating if CameraMotion is deemed to originate from rolling
|
||||
// shutter camera (index >= 0), and if so, denotes the index in the
|
||||
// mixture_homography_spectrum, where higher indices correspond to heavier
|
||||
// regularized motions. If motion is not deemed to originate from a rolling
|
||||
// shutter camera, index is set to -1.
|
||||
optional int32 rolling_shutter_motion_index = 16 [default = -1];
|
||||
|
||||
// List of overlay indices (cell locations in column major format) over domain
|
||||
// of size overlay_domain x overlay_domain, where
|
||||
// overlay_domain is set by MotionEstimation to
|
||||
// MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size.
|
||||
// Overlay analysis is performed over chunk of frames, as specified by
|
||||
// MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting
|
||||
// overlay indices being assigned to each frame of the chunk.
|
||||
// Consequently it suffices to store the result only for the first frame
|
||||
// of every chunk. Subsequent frames store a single negative index relative
|
||||
// to the first chunk frame indicating where to locate the overlay indicies.
|
||||
// Specifically if for frame f, overlay_indices(0) == -2, overlay indices for
|
||||
// corresponding chunk can be found at frame f - 2.
|
||||
// For details about how overlay indices are used to flag a frame to contain
|
||||
// an overlay, see MotionFilterOptions::OverlayOptions.
|
||||
repeated int32 overlay_indices = 17;
|
||||
optional int32 overlay_domain = 18 [default = 10];
|
||||
|
||||
// CameraMotion type indicates whether highest degree of freedom (DOF)
|
||||
// model estimation was deemed stable, in which case CameraMotion::Type is set
|
||||
// to VALID.
|
||||
// If a model was deemed not stable (according to *StabilityBounds in
|
||||
// MotionEstimationOptions), it is set to the lower dof type which was deemed
|
||||
// stable.
|
||||
enum Type {
|
||||
VALID = 0; // All requested motion models estimated reliably.
|
||||
UNSTABLE_HOMOG = 1; // Fallback to homographies, mixture unreliable.
|
||||
UNSTABLE_SIM = 2; // Fallback to similarity model, homography
|
||||
// unreliable.
|
||||
UNSTABLE = 3; // Fallback to translation model, similarity
|
||||
// unreliable, legacy naming.
|
||||
INVALID = 4; // Identity model, translation unreliable.
|
||||
}
|
||||
|
||||
optional Type type = 6 [default = VALID];
|
||||
|
||||
// If set, stores original type in case it was overriden (by filtering
|
||||
// functions, etc.).
|
||||
optional Type overridden_type = 15 [default = VALID];
|
||||
|
||||
// Set of optional *bit* flags set for various purposes.
|
||||
enum Flags {
|
||||
FLAG_SHOT_BOUNDARY = 1; // Set to indicate presence of a
|
||||
// shot boundary.
|
||||
FLAG_BLURRY_FRAME = 2;
|
||||
FLAG_MAJOR_OVERLAY = 4;
|
||||
FLAG_SHARP_FRAME = 8; // Set if frame is considered sharp
|
||||
// in a neighborhood of frames.
|
||||
FLAG_SINGULAR_ESTIMATION = 16; // Indicates that estimation resulted
|
||||
// in singular optimization problem.
|
||||
// Used internally by MotionEstimation.
|
||||
// Indicates if shot boundary is part of a fade. If so, all frames of the
|
||||
// fade will be labeled with the FLAG but only the begin and end of the fade
|
||||
// will have the FLAG_SHOT_BOUNDARY set.
|
||||
FLAG_SHOT_FADE = 32;
|
||||
|
||||
FLAG_DUPLICATED = 64; // Set if frame is exact duplicate of
|
||||
// previous frame.
|
||||
FLAG_CENTER_FRAME = 128; // Indicates this frame is at the
|
||||
// center of the sequence. Currently
|
||||
// used to constrain stabilizing crop
|
||||
// transform.
|
||||
}
|
||||
|
||||
optional int32 flags = 19 [default = 0];
|
||||
|
||||
// Same as in RegionFlowFeatureList (from region_flow.proto), measures blur
|
||||
// as average cornerness over textured areas. As it depends on the image
|
||||
// content, should only be used relative.
|
||||
optional float blur_score = 20;
|
||||
|
||||
// Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching
|
||||
// frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest
|
||||
// frame.
|
||||
optional float bluriness = 21 [default = 0.0];
|
||||
|
||||
// Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction
|
||||
// of long feature tracks that got rejected for this frame.
|
||||
optional float frac_long_features_rejected = 26;
|
||||
|
||||
// Same as in RegionFlowFeatureList (from region_flow.proto).
|
||||
// Timestamp in micro seconds of the underlying frame.
|
||||
optional int64 timestamp_usec = 27 [default = 0];
|
||||
|
||||
// Same as in RegionFlowFeatureList (from region_flow.proto).
|
||||
// Denotes frame that motion was computed w.r.t. to, locally to the current
|
||||
// frame. Values < 0 indicate backward tracking, while values > 0 indicate
|
||||
// forward tracking. For example, match_frame = -1, indicates tracking is
|
||||
// from current to previous frame.
|
||||
optional int32 match_frame = 28 [default = 0];
|
||||
|
||||
// Deprecated fields.
|
||||
extensions 9;
|
||||
}
|
||||
1023
mediapipe/util/tracking/flow_packager.cc
Normal file
168
mediapipe/util/tracking/flow_packager.h
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/util/tracking/flow_packager.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Usage (output):
|
||||
// FlowPackager flow_packager((FlowPackagerOptions()));
|
||||
//
|
||||
// // Input: Feature lists and optional camera motion
|
||||
// vector<RegionFlowFeatureList> input_features; // Externally supplied.
|
||||
// vector<CameraMotion> input_motions;
|
||||
//
|
||||
// const int num_frames = input_features.size();
|
||||
//
|
||||
// // Can encode to either TrackingContainerFormat or use protos.
|
||||
// TrackingContainerFormat container;
|
||||
// TrackingContainerProto proto;
|
||||
//
|
||||
// for (int f = 0; f < num_frames; ++f) {
|
||||
// // Obtain tracking data.
|
||||
// TrackingData track_data;
|
||||
// flow_packager.PackFlow(input_features[f],
|
||||
// input_motions[f],
|
||||
// &track_data);
|
||||
//
|
||||
// // Encode tracking data.
|
||||
// BinaryTrackingData binary_data;
|
||||
// flow_packager.EncodeTrackingData(track_data,
|
||||
// &binary_data);
|
||||
//
|
||||
// // Add to either container format or proto.
|
||||
// // Container:
|
||||
// TrackingContainer* track_data_encoded = container.add_track_data();
|
||||
// flow_packager.BinaryTrackingDataToContainer(binary_data,
|
||||
// track_data_encoded);
|
||||
// // Proto:
|
||||
// proto.add_track_data()->CopyFrom(binary_data);
|
||||
// }
|
||||
//
|
||||
// // Write meta and term containers.
|
||||
// flow_packager.FinalizeTrackingContainerFormat(&container);
|
||||
// flow_packager.FinalizeTrackingProto(&proto);
|
||||
//
|
||||
// // Convert to binary std::string to stream out.
|
||||
// std::string output;
|
||||
// flow_packager.TrackingContainerFormatToBinary(container, &output);
|
||||
// // OR:
|
||||
// proto.SerializeToString(&output);
|
||||
|
||||
// Usage (input):
|
||||
// std::string input;
|
||||
// FlowPackager flow_packager((FlowPackagerOptions()));
|
||||
// TrackingContainerFormat container;
|
||||
// flow_packager.TrackingContainerFormatFromBinary(input, &container);
|
||||
//
|
||||
// // Obtain Track data.
|
||||
// vector<TrackingData> tracking_data; // To be used by tracking.
|
||||
// for (const auto& track_data_encoded : container.track_data()) {
|
||||
// tracking_data.push_back(TrackingData());
|
||||
// flow_packager.DecodeTrackingData(track_data_encoded,
|
||||
// &tracking_data.back());
|
||||
// }
|
||||
//
|
||||
// // Use tracking_data with Tracker.
|
||||
|
||||
class CameraMotion;
|
||||
class RegionFlowFeatureList;
|
||||
|
||||
class FlowPackager {
|
||||
public:
|
||||
explicit FlowPackager(const FlowPackagerOptions& options);
|
||||
FlowPackager(const FlowPackager&) = delete;
|
||||
FlowPackager& operator=(const FlowPackager&) = delete;
|
||||
|
||||
void PackFlow(const RegionFlowFeatureList& feature_list,
|
||||
const CameraMotion* camera_motion, // optional.
|
||||
TrackingData* tracking_data) const;
|
||||
|
||||
// Converts TrackingData to condensed binary representation.
|
||||
void EncodeTrackingData(const TrackingData& tracking_data,
|
||||
BinaryTrackingData* binary_data) const;
|
||||
|
||||
void DecodeTrackingData(const BinaryTrackingData& data,
|
||||
TrackingData* tracking_data) const;
|
||||
|
||||
void BinaryTrackingDataToContainer(const BinaryTrackingData& binary_data,
|
||||
TrackingContainer* container) const;
|
||||
|
||||
void BinaryTrackingDataFromContainer(const TrackingContainer& container,
|
||||
BinaryTrackingData* binary_data) const;
|
||||
|
||||
void DecodeMetaData(const TrackingContainer& data, MetaData* meta_ata) const;
|
||||
|
||||
// Fills in meta (first container) and termination data (last container).
|
||||
// Optionally, pass timestamps for each frame.
|
||||
void FinalizeTrackingContainerFormat(
|
||||
std::vector<uint32>* timestamps, // optional, can be null.
|
||||
TrackingContainerFormat* container_fromat);
|
||||
void FinalizeTrackingContainerProto(
|
||||
std::vector<uint32>* timestamps, // optional, can be null.
|
||||
TrackingContainerProto* proto);
|
||||
|
||||
// Fast encode to binary representation.
|
||||
void TrackingContainerFormatToBinary(
|
||||
const TrackingContainerFormat& container_format, std::string* binary);
|
||||
|
||||
// Fast decode from binary representation.
|
||||
void TrackingContainerFormatFromBinary(
|
||||
const std::string& binary, TrackingContainerFormat* container_format);
|
||||
|
||||
// Checks whether tracking data can be encoded in high profile mode without
|
||||
// duplicating any features. This occurs if the horizonal distance between two
|
||||
// features is less than 64.
|
||||
bool CompatibleForEncodeWithoutDuplication(
|
||||
const TrackingData& tracking_data) const;
|
||||
|
||||
// Helper function for test. Sorts according to scaled, integer based
|
||||
// lexicographical ordering.
|
||||
// Declared public for access by test.
|
||||
void SortRegionFlowFeatureList(float scale_x, float scale_y,
|
||||
RegionFlowFeatureList* feature_list) const;
|
||||
|
||||
// Removes binary encoded container from std::string and parses it to
|
||||
// container. Returns header std::string of the parsed container. Useful for
|
||||
// random seek.
|
||||
std::string SplitContainerFromString(absl::string_view* binary_data,
|
||||
TrackingContainer* container);
|
||||
|
||||
private:
|
||||
// Sets meta data for a set
|
||||
void InitializeMetaData(int num_frames, const std::vector<uint32>& msecs,
|
||||
const std::vector<int>& data_sizes,
|
||||
MetaData* meta_data) const;
|
||||
|
||||
// Serializes container to binary std::string and adds it to binary_data.
|
||||
void AddContainerToString(const TrackingContainer& container,
|
||||
std::string* binary_data);
|
||||
|
||||
private:
|
||||
FlowPackagerOptions options_;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
|
||||
319
mediapipe/util/tracking/flow_packager.proto
Normal file
|
|
@ -0,0 +1,319 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/util/tracking/motion_models.proto";
|
||||
import "mediapipe/util/tracking/region_flow.proto";
|
||||
|
||||
// Messages encapsulating compressed and uncompressed TrackingData.
|
||||
//
|
||||
// Uncompressed tracking data can be aggregated via an TrackingDataChunk
|
||||
// (e.g. to be cached to file per chunk). The whole chunk persists in memory
|
||||
// after reading.
|
||||
//
|
||||
// Compressed tracking data can be aggregated as binary encoded TrackingData
|
||||
// messages into two container formats (with support for random seeking):
|
||||
// 1) TrackingContainerProto:
|
||||
// Encoding using proto buffer wire format, using default proto
|
||||
// serialization and de-serialization to binary string.
|
||||
// The container uses the MetaData message to store the stream offsets and
|
||||
// milliseconds for each frame of TrackingData. TrackingData itself is custom
|
||||
// encoded to binary using FlowPackager::EncodeTrackingData and the resulting
|
||||
// binary blob wrapped in a BinaryTrackingData message.
|
||||
// 2) TrackingContainerFormat:
|
||||
// Encoding without any dependencies to protobuffers, for clients without
|
||||
// proto buffer support.
|
||||
// Encoding is based on encoding binary blobs of data wrapped into repeated
|
||||
// containers. The layout of a container is described by the message
|
||||
// TrackingContainer and serialized to binary data as described below
|
||||
// (without using proto encoding). Therefore, message TrackingContainer is
|
||||
// mostly for documentation purposes than for direct use.
|
||||
// The format is described by the proto message TrackingContainerFormat (used
|
||||
// internally by FlowPackager) however serialization and de-serialization
|
||||
// to binary string is performed using custom methods supplied by
|
||||
// FlowPackager (TrackingContainerFormatToBinary and
|
||||
// TrackingContainerFormatFromBinary).
|
||||
// The format stores the MetaData first as above, although using custom
|
||||
// encoding. TrackingData is encoded to binary as above using
|
||||
// FlowPackager::EncodeTrackingData and the resulting binary blob is storred
|
||||
// within a TrackingContainer.
|
||||
|
||||
// Next flag: 9
|
||||
message TrackingData {
|
||||
enum FrameFlags {
|
||||
FLAG_PROFILE_BASELINE = 0;
|
||||
FLAG_PROFILE_HIGH = 1;
|
||||
FLAG_HIGH_FIDELITY_VECTORS = 2;
|
||||
FLAG_BACKGROUND_UNSTABLE = 4; // Background model could not be estimated.
|
||||
FLAG_DUPLICATED = 8; // Frame is duplicated, i.e. identical to
|
||||
// previous one.
|
||||
// Indicates the beginning of a new chunk. In this case the track_id's
|
||||
// are not compatible w.r.t. previous one.
|
||||
FLAG_CHUNK_BOUNDARY = 16;
|
||||
}
|
||||
|
||||
optional int32 frame_flags = 1 [default = 0];
|
||||
|
||||
// Tracking data is resolution independent specified w.r.t.
|
||||
// specified domain.
|
||||
optional int32 domain_width = 2;
|
||||
optional int32 domain_height = 3;
|
||||
|
||||
// Aspect ratio (w/h) of the original frame tracking data was computed from.
|
||||
optional float frame_aspect = 6 [default = 1.0];
|
||||
|
||||
optional Homography background_model = 4;
|
||||
|
||||
// Stores num_elements vectors of motion data. (x,y) position encoded via
|
||||
// row_indices and col_starts, as compressed sparse column matrix storage
|
||||
// format:
|
||||
// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29),
|
||||
// Vector data is stored as (dx, dy) position. Optionally we store the fitting
|
||||
// error and track id for each feature.
|
||||
message MotionData {
|
||||
optional int32 num_elements = 1;
|
||||
|
||||
// #num_elements pairs (flow_x, flow_y) densely packed.
|
||||
repeated float vector_data = 2 [packed = true];
|
||||
|
||||
// Stores corresponding track index for each feature. Features belonging
|
||||
// to the same track over time are assigned the same id.
|
||||
// NOTE: Due to size, tracking ids are never stored as compressed binary
|
||||
// tracking data.
|
||||
repeated int32 track_id = 3 [packed = true];
|
||||
|
||||
// # num_elements row indices.
|
||||
repeated int32 row_indices = 4 [packed = true];
|
||||
|
||||
// Start index in above array for each column (#domain_width + 1 entries).
|
||||
repeated int32 col_starts = 5 [packed = true];
|
||||
|
||||
// Feature descriptors for num_elements feature points.
|
||||
repeated BinaryFeatureDescriptor feature_descriptors = 6;
|
||||
}
|
||||
|
||||
optional MotionData motion_data = 5;
|
||||
|
||||
// Total number of features in our analysis
|
||||
optional uint32 global_feature_count = 7;
|
||||
|
||||
// Average of all motion vector magnitudes (without accounting for any motion
|
||||
// model), within 10th to 90th percentile (to remove outliers).
|
||||
optional float average_motion_magnitude = 8;
|
||||
}
|
||||
|
||||
message TrackingDataChunk {
|
||||
message Item {
|
||||
optional TrackingData tracking_data = 1;
|
||||
// Global frame index.
|
||||
optional int32 frame_idx = 2;
|
||||
// Corresponding timestamp.
|
||||
optional int64 timestamp_usec = 3;
|
||||
// Previous frame timestamp.
|
||||
optional int64 prev_timestamp_usec = 4;
|
||||
}
|
||||
|
||||
repeated Item item = 1;
|
||||
|
||||
// Set as marker for last chunk.
|
||||
optional bool last_chunk = 2 [default = false];
|
||||
|
||||
// Set as marker for first chunk.
|
||||
optional bool first_chunk = 3 [default = false];
|
||||
}
|
||||
|
||||
// TrackingData in compressed binary format. Obtainable via
|
||||
// FlowPackager::EncodeTrackingData. Details of binary encode are below.
|
||||
message BinaryTrackingData { // TrackingContainer::header = "TRAK"
|
||||
optional bytes data = 1;
|
||||
}
|
||||
|
||||
// Detailed explanation of binary Tracking data encode (LITTLE ENDIAN encode!)
|
||||
// TrackingData is stored in binary as a struct of the above fields and the
|
||||
// compressed motion data in sparse column matrix storage format.
|
||||
// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29)
|
||||
// Specifically, TrackingData is encoded as:
|
||||
// { frame_flags : 32 bit int (from member)
|
||||
// domain_width : 32 bit int (from member)
|
||||
// domain_height : 32 bit int (from member)
|
||||
// frame_aspect : 32 bit float (from member)
|
||||
//
|
||||
// background_model : 6 * 32 bit float (dx, dy, a, b, c, d of AffineModel)
|
||||
// scale : 32 bit float (scale vectors are multiplied with)
|
||||
// num_vectors : 32 bit int (from member num_elements)
|
||||
//
|
||||
// col_start_delta : (domain_width + 1) * 8 bit uint (col starts delta
|
||||
// encoded)
|
||||
// row_idx_size : 32 bit int (size of row_idx array <= num_vectors)
|
||||
// row_idx : row_idx_size * 8 bit uint
|
||||
// vector_size : 32 bit int (size of vector_data)
|
||||
// vector_data : vector_size * [8 bit | 16 bit] int
|
||||
// (depending on FLAG_HIGH_FIDELITY_VECTORS)
|
||||
// }
|
||||
//
|
||||
// >> Baseline encode <<
|
||||
// Scale is determined such that maximum vector value (maximum across x and y)
|
||||
// is mapped to highest 8 bit or 16 bit SIGNED int
|
||||
// (i.e. 7 or 15 bit resolution respectively).
|
||||
// Vector values are multiplied by this scale (storring float in int with
|
||||
// truncation) and (dx, dy) is packed as [dy | dx] into a 16bit or 32 bit word.
|
||||
// Unpacking requires therefore dividing the vector values by scale.
|
||||
//
|
||||
// Column starts are delta compressed, that is, col_start_delta[i] stores
|
||||
// col_starts(i) - col_starts(i - 1) from MotionData.
|
||||
//
|
||||
// Row indices are directly stored at 8 bit uints, that is row_idx_size ==
|
||||
// num_vectors in this case.
|
||||
//
|
||||
//
|
||||
// >> High profile encode <<
|
||||
// Scale is determined as above but for maximum vector deltas (maximum across x
|
||||
// and y of magnitude in difference between two adjacent vectors). Vector value
|
||||
// deltas are multiplied by this scale before encoding.
|
||||
//
|
||||
// Encoding is more complex compared to baseline. Instead of vector value, delta
|
||||
// vector values (difference in dx = ddx, difference in dy = ddy)
|
||||
// are multiplied by scale and storred packed as [ddy | ddx] into to 16bit or
|
||||
// 32bit word. Compression algorithm accounts for error accumulation, so
|
||||
// unpacking should first add deltas in integer domain (for x and y separately)
|
||||
// and then divide by scale to yield (an approximation) of the
|
||||
// original vector value.
|
||||
// Most importantly, not every vector value is storred, but only if the delta is
|
||||
// above the FlowPackagerOptions::high_profile_reuse_threshold, in which case we
|
||||
// advance to the next vector data. Otherwise the previous vector is used.
|
||||
//
|
||||
// The information whether to advance is stored for each vector in the
|
||||
// highest bit of of the row index (FlowPackagerOptions::ADVANCE_FLAG). Row
|
||||
// indicies are not storred as in the baseline profile directly, but as deltas
|
||||
// (reset at the beginning of every column). As deltas are small it is often
|
||||
// possible to store two deltas (if both are < 8) in a single byte. This is
|
||||
// indicated by the second highest flag in the row index
|
||||
// (FlowPackagerOptions::DOUBLE_INDEX_ENCODE). If set, row index stores
|
||||
// [row_delta_1 | row_delta_2] in the lower 6 bit. Note, that the advance flag
|
||||
// applies uniformly to both deltas in this case.
|
||||
// Sidenote (edge case): Due to the use of the top 2 bits as flags,
|
||||
// at times we cannot store the full row delta in the lower 6 bits.
|
||||
// In this case the vector is duplicated (using the ADVANCE_FLAG)
|
||||
// until the delta sum of duplicated vectors reaches the original delta.
|
||||
// Consequently, the compressed vector field in high profile may contain a few
|
||||
// vectors more than the original.
|
||||
//
|
||||
// Column starts are delta compressed as in baseline, but account for double
|
||||
// index encodes. Therefore each column delta is reduced by the number of double
|
||||
// index encodes occuring for this column. This has to be replicated on the
|
||||
// decoding side, each delta needs to be increased by the number of double index
|
||||
// encodes encountered during encoding.
|
||||
|
||||
// Stores offsets for random seek and time offsets for each frame of
|
||||
// TrackingData. Stream offsets are specified relative w.r.t. end of metadata
|
||||
// blob.
|
||||
// Offsets specify start of the corresponding binary encoded TrackingContainer
|
||||
// (for TrackingContainerFormat) or BinaryTrackingData proto (for
|
||||
// TrackingContainerProto).
|
||||
message MetaData { // TrackingContainer::header = "META"
|
||||
optional fixed32 num_frames = 2;
|
||||
|
||||
message TrackOffset {
|
||||
optional fixed32 msec = 1; // Time offset of the metadata in msec.
|
||||
optional fixed32 stream_offset = 2; // Offset of TrackingContainer or
|
||||
// respectively BinaryTrackingData
|
||||
// in stream.
|
||||
// Specifed w.r.t. end of the Metadata.
|
||||
}
|
||||
|
||||
repeated TrackOffset track_offsets = 3;
|
||||
}
|
||||
|
||||
// TrackingContainer is self-describing container format to store arbitrary
|
||||
// chunks of binary data. Each container is typed via its 4 character header,
|
||||
// versioned via an int, and followed by the size of the binary data and the
|
||||
// actual data. Designed for clients without availability of protobuffer
|
||||
// support.
|
||||
// Note: This message is mainly used for documentation purposes and uses custom
|
||||
// encoding as specified by FlowPackager::TrackingContainerFormatToBinary.
|
||||
// Default binary size of a TrackingContainer (DO NOT CHANGE!):
|
||||
// header: 4 byte +
|
||||
// version: 4 byte +
|
||||
// size: 4 byte +
|
||||
// data #size
|
||||
// SUM: 12 + #size.
|
||||
message TrackingContainer {
|
||||
optional string header = 1; // 4 character header.
|
||||
optional fixed32 version = 2 [default = 1]; // Version information.
|
||||
optional fixed32 size = 3; // Size of binary data held by container
|
||||
optional bytes data = 4; // Binary data encoded.
|
||||
|
||||
// DO NOT alter layout of TrackingContainer.
|
||||
// Use version to extend or alter encoded binary data.
|
||||
}
|
||||
|
||||
// Container format for clients without proto support (written via
|
||||
// FlowPackager::TrackingContainerFormatToBinary and read via
|
||||
// FlowPackager::TrackingContainerFormatFromBinary).
|
||||
// Proto here is intermediate format for documentationa and internal use.
|
||||
// Stores multiple TrackingContainers of different types.
|
||||
// Meta data is storred first, to facilitate random seek (via stream offset
|
||||
// positions) to arbitrary binary TrackinData. Termination container signals end
|
||||
// of stream.
|
||||
message TrackingContainerFormat {
|
||||
optional TrackingContainer meta_data = 1; // Wraps binary meta data, via
|
||||
// custom encode.
|
||||
repeated TrackingContainer track_data = 2; // Wraps BinaryTrackingData.
|
||||
|
||||
// Add new TrackingContainers above before end of stream indicator.
|
||||
// Zero sized termination container with TrackingContainer::header = "TERM".
|
||||
optional TrackingContainer term_data = 3;
|
||||
}
|
||||
|
||||
// Simplified proto format of above TrackingContainerFormat. Instead of using
|
||||
// self-describing TrackingContainer's, we simply use the proto wire format for
|
||||
// encoding and decoding (proto format is typed and versioned via ids).
|
||||
message TrackingContainerProto {
|
||||
optional MetaData meta_data = 1;
|
||||
repeated BinaryTrackingData track_data = 2;
|
||||
}
|
||||
|
||||
// Options controlling compression and encoding.
|
||||
message FlowPackagerOptions {
|
||||
// Tracking data is resolution independent specified w.r.t.
|
||||
// specified domain. Only values <= 256 are supported if binary tracking data
|
||||
// is requested to be supported (see below).
|
||||
optional int32 domain_width = 1 [default = 256];
|
||||
optional int32 domain_height = 2 [default = 192];
|
||||
|
||||
// Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding
|
||||
// is not required, can be set to false in which case a higher domain_width
|
||||
// can be used.
|
||||
optional bool binary_tracking_data_support = 6 [default = true];
|
||||
|
||||
optional bool use_high_profile = 3 [default = false];
|
||||
|
||||
// If set uses 16 bit encode for vector data, in BinaryTrackingData,
|
||||
// otherwise only 8 bits are used.
|
||||
optional bool high_fidelity_16bit_encode = 4 [default = true];
|
||||
|
||||
// In high profile encode, re-use previously encoded vector when absolute
|
||||
// difference to current vector is below threshold.
|
||||
optional float high_profile_reuse_threshold = 5 [default = 0.5];
|
||||
|
||||
// High profile encoding flags.
|
||||
enum HighProfileEncoding {
|
||||
ADVANCE_FLAG = 0x80;
|
||||
DOUBLE_INDEX_ENCODE = 0x40;
|
||||
INDEX_MASK = 0x3F;
|
||||
}
|
||||
}
|
||||
93
mediapipe/util/tracking/frame_selection.proto
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/util/tracking/camera_motion.proto";
|
||||
import "mediapipe/util/tracking/frame_selection_solution_evaluator.proto";
|
||||
import "mediapipe/util/tracking/region_flow.proto";
|
||||
|
||||
// Stores selected timestamps and corresponding frame index.
|
||||
message FrameSelectionTimestamp {
|
||||
optional int64 timestamp = 1; // Timestamp of the selected frame.
|
||||
|
||||
// Frame index of the selected frame in the initial video stream. If this
|
||||
// timestamp was manufactured, this will be the index of the initial frame.
|
||||
optional int32 frame_idx = 2;
|
||||
|
||||
// If this timestamp was manufactured, the timestamp of the original frame.
|
||||
optional int64 processed_from_timestamp = 3 [default = -1];
|
||||
}
|
||||
|
||||
// Stores the result of the frame selection, with composited features.
|
||||
// Next index: 6
|
||||
message FrameSelectionResult {
|
||||
optional int64 timestamp = 1; // Timestamp of the selected frame.
|
||||
|
||||
// Frame index of the selected frame in the initial video stream. If this
|
||||
// timestamp was manufactured, this will be the index of the initial frame.
|
||||
optional int32 frame_idx = 2;
|
||||
|
||||
// CameraMotion from selected item to previous selected item.
|
||||
optional CameraMotion camera_motion = 3;
|
||||
|
||||
// Features from selected item to previous selected item.
|
||||
optional RegionFlowFeatureList features = 4;
|
||||
|
||||
// If this FrameSelectionResult was the result of processing a previous one,
|
||||
// the timestamp of the original frame.
|
||||
optional int64 processed_from_timestamp = 5 [default = -1];
|
||||
}
|
||||
|
||||
// Next index: 7
|
||||
message FrameSelectionCriterion {
|
||||
// Interval at which frames should be sampled; set to zero if sampling should
|
||||
// not be enforced (i.e. selection is performed w.r.t. other criteria).
|
||||
optional int32 sampling_rate = 1 [default = 0];
|
||||
|
||||
// Bandwidth used during dynamic programming. The larger the bandwidth the
|
||||
// more accurate the result w.r.t. the specified sampling rate. Smaller
|
||||
// bandwidth's bias the solution suboptimally to center around the mean
|
||||
// frame numbers of the sampling rate.
|
||||
// If in (0, 1), assumed to specify fraction of total number of input frames,
|
||||
// otherwise must be an integer.
|
||||
optional float bandwidth_frames = 2 [default = 50];
|
||||
|
||||
// Search radius for dynamic programming (how many frames you are allowed to
|
||||
// search around the previous frame).
|
||||
optional int32 search_radius_frames = 3 [default = 1];
|
||||
|
||||
// Allows one to specify custom solution selection criteria (i.e. different
|
||||
// way to choose the best row of the computed cost matrix).
|
||||
optional FrameSelectionSolutionEvaluatorType solution_evaluator = 4;
|
||||
|
||||
// Outputs a fixed number of frames and automatically sets the appropriate
|
||||
// sampling rate. Set to 0 by default (i.e. not enabled).
|
||||
optional int32 max_output_frames = 5 [default = 0];
|
||||
}
|
||||
|
||||
// Options for computing frame selection.
|
||||
// TODO: Support multiple criteria if required. Currently uses only the
|
||||
// first one.
|
||||
message FrameSelectionOptions {
|
||||
repeated FrameSelectionCriterion criterion = 1;
|
||||
|
||||
// FrameSelection buffers incoming CameraMotions for specified chunk size
|
||||
// and creates cost matrices upon reaching the limit.
|
||||
// TODO: Implement if necessary (currently nothing is cleared upon
|
||||
// reaching the limit).
|
||||
optional int32 chunk_size = 2 [default = 100];
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
message FrameSelectionSolutionEvaluatorOptions {
|
||||
extensions 20000 to max;
|
||||
}
|
||||
|
||||
message FrameSelectionSolutionEvaluatorType {
|
||||
// Class of type FrameSelectionSolution that computes the best row.
|
||||
optional string class_name = 1 [default = "FrameSelectionSolutionEvaluator"];
|
||||
optional FrameSelectionSolutionEvaluatorOptions options = 2;
|
||||
}
|
||||
116
mediapipe/util/tracking/image_util.cc
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/image_util.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Returns median of the L1 color distance between img_1 and img_2
|
||||
float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2) {
|
||||
CHECK(img_1.size() == img_2.size());
|
||||
CHECK_EQ(img_1.channels(), img_2.channels());
|
||||
|
||||
std::vector<float> color_diffs;
|
||||
color_diffs.reserve(img_1.cols * img_1.rows);
|
||||
const int channels = img_1.channels();
|
||||
for (int j = 0; j < img_1.rows; ++j) {
|
||||
const uint8* src_1 = img_1.ptr<uint8>(j);
|
||||
const uint8* src_2 = img_2.ptr<uint8>(j);
|
||||
const int end_i = channels * img_1.cols;
|
||||
const float inverse = 1.0f / channels;
|
||||
for (int i = 0; i < end_i;) {
|
||||
float color_diff = 0.0f;
|
||||
for (int k = 0; k < channels; ++k, ++i) {
|
||||
color_diff +=
|
||||
std::abs(static_cast<int>(src_1[i]) - static_cast<int>(src_2[i]));
|
||||
}
|
||||
color_diffs.push_back(color_diff * inverse);
|
||||
}
|
||||
}
|
||||
|
||||
auto median = color_diffs.begin() + color_diffs.size() / 2;
|
||||
std::nth_element(color_diffs.begin(), median, color_diffs.end());
|
||||
return *median;
|
||||
}
|
||||
|
||||
void JetColoring(int steps, std::vector<Vector3_f>* color_map) {
|
||||
CHECK(color_map != nullptr);
|
||||
color_map->resize(steps);
|
||||
for (int i = 0; i < steps; ++i) {
|
||||
const float frac = 2.0f * (i * (1.0f / steps) - 0.5f);
|
||||
if (frac < -0.8f) {
|
||||
(*color_map)[i] = Vector3_f(0, 0, 0.6f + (frac + 1.0) * 2.0f) * 255.0f;
|
||||
} else if (frac >= -0.8f && frac < -0.25f) {
|
||||
(*color_map)[i] = Vector3_f(0, (frac + 0.8f) * 1.82f, 1.0f) * 255.0f;
|
||||
} else if (frac >= -0.25f && frac < 0.25f) {
|
||||
(*color_map)[i] =
|
||||
Vector3_f((frac + 0.25f) * 2.0f, 1.0f, 1.0 + (frac + 0.25f) * -2.0f) *
|
||||
255.0f;
|
||||
} else if (frac >= 0.25f && frac < 0.8f) {
|
||||
(*color_map)[i] =
|
||||
Vector3_f(1.0f, 1.0f + (frac - 0.25f) * -1.81f, 0.0f) * 255.0f;
|
||||
} else if (frac >= 0.8f) {
|
||||
(*color_map)[i] =
|
||||
Vector3_f(1.0f + (frac - 0.8f) * -2.0f, 0.0f, 0.0f) * 255.0f;
|
||||
} else {
|
||||
LOG(ERROR) << "Out of bound value. Should not occur.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RenderSaliency(const SalientPointFrame& salient_points,
|
||||
const cv::Scalar& line_color, int line_thickness,
|
||||
bool render_bounding_box, cv::Mat* image) {
|
||||
// Visualize salient points.
|
||||
for (const auto& point : salient_points.point()) {
|
||||
if (point.weight() > 0) {
|
||||
SalientPoint copy = point;
|
||||
ScaleSalientPoint(image->cols, image->rows, ©);
|
||||
Vector2_f pt(copy.norm_point_x(), copy.norm_point_y());
|
||||
|
||||
cv::ellipse(*image, cv::Point(pt.x(), pt.y()),
|
||||
cv::Size(copy.norm_major(), copy.norm_minor()),
|
||||
copy.angle() / M_PI * 180.0,
|
||||
0, // start angle
|
||||
360, // end angle
|
||||
line_color, line_thickness);
|
||||
|
||||
if (render_bounding_box) {
|
||||
std::vector<Vector2_f> ellipse_bounding_box;
|
||||
BoundingBoxFromEllipse(pt, copy.norm_major(), copy.norm_minor(),
|
||||
copy.angle(), &ellipse_bounding_box);
|
||||
|
||||
std::vector<cv::Point> corners;
|
||||
corners.reserve(4);
|
||||
for (const Vector2_f& corner : ellipse_bounding_box) {
|
||||
corners.emplace_back(cv::Point(corner.x(), corner.y()));
|
||||
}
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
cv::line(*image, corners[k], corners[(k + 1) % 4], line_color,
|
||||
line_thickness, CV_AA);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
192
mediapipe/util/tracking/image_util.h
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/vector.h"
|
||||
#include "mediapipe/util/tracking/motion_models.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Returns median of the L1 color distance between img_1 and img_2.
|
||||
float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2);
|
||||
|
||||
// Matlab's jet color map (returned assuming RGB channel order in [0, 1]
|
||||
// normalized intensity domain). For details: http://goo.gl/gmHKZ
|
||||
// Returned as map with num_entries entries.
|
||||
void JetColoring(int num_entries, std::vector<Vector3_f>* color_map);
|
||||
|
||||
// Draws a saliency point frame to a single frame.
|
||||
// Optionally renders axis aligned bounding box for each SalientPointFrame.
|
||||
void RenderSaliency(const SalientPointFrame& salient_points,
|
||||
const cv::Scalar& line_color, int line_thickness,
|
||||
bool render_bounding_box, cv::Mat* image);
|
||||
|
||||
// Templated CopyBorder methods for increased speed. In-place border copy
|
||||
// for specified Mat of type T with channels. Passed matrix is assumed to be of
|
||||
// full size, that is we copy the content at [border, cols - 2 * border] x
|
||||
// [border, rows - 2 * border] to the full size.
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorder(cv::Mat* mat);
|
||||
|
||||
// Same as above for copying border only in X or Y
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorderX(cv::Mat* mat);
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorderY(cv::Mat* mat);
|
||||
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorder(cv::Mat* mat) {
|
||||
const int width = mat->cols - 2 * border;
|
||||
const int height = mat->rows - 2 * border;
|
||||
|
||||
// Maximum values we clamp at to avoid going out of bound small images.
|
||||
const int max_w = width - 1;
|
||||
const int max_h = height - 1;
|
||||
|
||||
// Top rows.
|
||||
for (int r = 0; r < border; ++r) {
|
||||
const T* src_ptr =
|
||||
mat->ptr<T>(border + std::min(r, max_h)) + border * channels;
|
||||
T* dst_ptr = mat->ptr<T>(border - 1 - r);
|
||||
|
||||
// Top left elems.
|
||||
for (int i = 0; i < border; ++i, dst_ptr += channels) {
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
dst_ptr[j] = src_ptr[std::min(max_w, border - 1 - i) * channels + j];
|
||||
}
|
||||
}
|
||||
|
||||
// src and dst should point to same column from here.
|
||||
DCHECK_EQ(0, (src_ptr - dst_ptr) * sizeof(T) % mat->step[0]);
|
||||
|
||||
// Top row copy.
|
||||
memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
|
||||
src_ptr += width * channels; // Points one behind end.
|
||||
dst_ptr += width * channels;
|
||||
|
||||
// Top right elems.
|
||||
for (int i = 0; i < border; ++i, dst_ptr += channels) {
|
||||
if (i <= max_w) {
|
||||
src_ptr -= channels;
|
||||
}
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
dst_ptr[j] = src_ptr[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Left and right border.
|
||||
for (int r = 0; r < height; ++r) {
|
||||
// Get pointers to left most and right most column within image.
|
||||
T* left_ptr = mat->ptr<T>(r + border) + border * channels;
|
||||
T* right_ptr = left_ptr + (width - 1) * channels;
|
||||
for (int i = 0; i < border; ++i) {
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
left_ptr[-(i + 1) * channels + j] =
|
||||
left_ptr[std::min(max_w, i) * channels + j];
|
||||
right_ptr[(i + 1) * channels + j] =
|
||||
right_ptr[-std::min(max_w, i) * channels + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bottom rows.
|
||||
for (int r = 0; r < border; ++r) {
|
||||
const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(r, max_h)) +
|
||||
border * channels;
|
||||
T* dst_ptr = mat->ptr<T>(border + height + r);
|
||||
|
||||
// First elems.
|
||||
for (int i = 0; i < border; ++i, dst_ptr += channels) {
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
dst_ptr[j] = src_ptr[(border - 1 - std::min(max_w, i)) * channels + j];
|
||||
}
|
||||
}
|
||||
|
||||
// src and dst should point to same column from here.
|
||||
DCHECK_EQ(0, (dst_ptr - src_ptr) * sizeof(T) % mat->step[0]);
|
||||
memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
|
||||
src_ptr += width * channels; // Points one behind the end.
|
||||
dst_ptr += width * channels;
|
||||
|
||||
// Top right elems.
|
||||
for (int i = 0; i < border; ++i, dst_ptr += channels) {
|
||||
if (i <= max_w) {
|
||||
src_ptr -= channels;
|
||||
}
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
dst_ptr[j] = src_ptr[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorderX(cv::Mat* mat) {
|
||||
const int width = mat->cols - 2 * border;
|
||||
const int height = mat->rows - 2 * border;
|
||||
|
||||
// Maximum values we clamp at to avoid going out of bound small images.
|
||||
const int max_w = width - 1;
|
||||
|
||||
// Left and right border.
|
||||
for (int r = 0; r < height; ++r) {
|
||||
T* left_ptr = mat->ptr<T>(r + border) + border * channels;
|
||||
T* right_ptr = left_ptr + (width - 1) * channels;
|
||||
for (int i = 0; i < border; ++i) {
|
||||
for (int j = 0; j < channels; ++j) {
|
||||
left_ptr[-(i + 1) * channels + j] =
|
||||
left_ptr[std::min(i, max_w) * channels + j];
|
||||
right_ptr[(i + 1) * channels + j] =
|
||||
right_ptr[-std::min(max_w, i) * channels + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int border, int channels>
|
||||
void CopyMatBorderY(cv::Mat* mat) {
|
||||
const int width = mat->cols - 2 * border;
|
||||
const int height = mat->rows - 2 * border;
|
||||
|
||||
// Maximum values we clamp at to avoid going out of bound small images.
|
||||
const int max_h = height - 1;
|
||||
|
||||
// Top rows.
|
||||
for (int r = 0; r < border; ++r) {
|
||||
const T* src_ptr =
|
||||
mat->ptr<T>(border + std::min(max_h, r)) + border * channels;
|
||||
T* dst_ptr = mat->ptr<T>(border - 1 - r) + border * channels;
|
||||
memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
|
||||
}
|
||||
|
||||
// Bottom rows.
|
||||
for (int r = 0; r < border; ++r) {
|
||||
const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(max_h, r)) +
|
||||
border * channels;
|
||||
T* dst_ptr = mat->ptr<T>(border + height + r) + border * channels;
|
||||
memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
|
||||
93
mediapipe/util/tracking/image_util_test.cc
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/image_util.h"
|
||||
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
template <int border>
|
||||
void TestCopyBorder(cv::Mat* full_size) {
|
||||
full_size->setTo(0);
|
||||
|
||||
const int width = full_size->cols - 2 * border;
|
||||
// Fill center with per pixel value [1, 2, 3].
|
||||
for (int y = border; y < full_size->rows - border; ++y) {
|
||||
float* row_ptr = full_size->ptr<float>(y);
|
||||
for (int x = border; x < full_size->cols - border; ++x) {
|
||||
int value = x - border;
|
||||
row_ptr[3 * x] = value + 0.1f;
|
||||
row_ptr[3 * x + 1] = value + 0.2f;
|
||||
row_ptr[3 * x + 2] = value + 0.3f;
|
||||
}
|
||||
}
|
||||
|
||||
CopyMatBorder<float, border, 3>(full_size);
|
||||
|
||||
// Main part should not be modified.
|
||||
for (int y = border; y < full_size->rows - border; ++y) {
|
||||
float* row_ptr = full_size->ptr<float>(y);
|
||||
for (int x = border; x < full_size->cols - border; ++x) {
|
||||
int value = x - border;
|
||||
EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
|
||||
EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
|
||||
EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
|
||||
}
|
||||
}
|
||||
|
||||
for (int y = 0; y < full_size->rows; ++y) {
|
||||
const float* row_ptr = full_size->ptr<float>(y);
|
||||
for (int x = 0; x < full_size->cols; ++x) {
|
||||
// Memory copy, expect exact floating point equality.
|
||||
int value = x - border;
|
||||
if (x < border) {
|
||||
// Cap to valid area.
|
||||
value = std::min(width - 1, (border - 1) - x);
|
||||
} else if (x >= full_size->cols - border) {
|
||||
// Last column minus distance from frame boundary, capped
|
||||
// to valid area.
|
||||
value = std::max(0, width - 1 - (x - full_size->cols + border));
|
||||
}
|
||||
EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
|
||||
EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
|
||||
EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PushPullFilteringTest, CopyBorder) {
|
||||
cv::Mat full_size(100, 50, CV_32FC3);
|
||||
TestCopyBorder<1>(&full_size);
|
||||
TestCopyBorder<2>(&full_size);
|
||||
TestCopyBorder<3>(&full_size);
|
||||
TestCopyBorder<4>(&full_size);
|
||||
TestCopyBorder<5>(&full_size);
|
||||
}
|
||||
|
||||
TEST(PushPullFilteringTest, CopyBorderSmallFrame) {
|
||||
cv::Mat full_size(3, 3, CV_32FC3);
|
||||
TestCopyBorder<1>(&full_size);
|
||||
|
||||
full_size.create(5, 5, CV_32FC3);
|
||||
TestCopyBorder<2>(&full_size);
|
||||
|
||||
full_size.create(7, 7, CV_32FC3);
|
||||
TestCopyBorder<3>(&full_size);
|
||||
|
||||
full_size.create(9, 9, CV_32FC3);
|
||||
TestCopyBorder<4>(&full_size);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
22
mediapipe/util/tracking/measure_time.cc
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/measure_time.h"
|
||||
|
||||
#ifdef SET_FLAG_MEASURE_TIME
|
||||
// If set to true, outputs time measurements to LOG(INFO).
|
||||
bool flags_measure_time = true;
|
||||
#else
|
||||
bool flags_measure_time = false;
|
||||
#endif
|
||||
169
mediapipe/util/tracking/measure_time.h
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Helper class and macro to take time measurements within current scope.
|
||||
// Takes time measurement within current scope. Outputs to LOG(INFO) if
|
||||
// flag --measure_time is set or if build flag SET_FLAG_MEASURE_TIME is
|
||||
// defined (add --copt=-DSET_FLAG_MEASURE_TIME to your build command).
|
||||
// Additionally you can limit time measurements to specific files,
|
||||
// via the flag
|
||||
// --measure_time_filter="<comma separated list of substrings of file names>"
|
||||
// Example:
|
||||
// { // Scope to be measured
|
||||
// MEASURE_TIME << "Some additional logging and answers : " << 42;
|
||||
// ... // instructions.
|
||||
// }
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "absl/strings/str_split.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "absl/time/clock.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
|
||||
extern bool flags_measure_time;
|
||||
|
||||
namespace mediapipe {
|
||||
class MeasureTimeFilter;
|
||||
} // namespace mediapipe
|
||||
|
||||
#define MEASURE_TIME \
|
||||
MEASURE_TIME_PRE_IMPL(flags_measure_time, __LINE__, __FILE__)
|
||||
|
||||
// Add level of indirection to trigger __LINE__ macro expansion.
|
||||
#define MEASURE_TIME_PRE_IMPL(show_output, line, file) \
|
||||
MEASURE_TIME_IMPL(show_output, line, file)
|
||||
|
||||
#define MEASURE_TIME_IMPL(show_output, line, file) \
|
||||
std::unique_ptr<mediapipe::ScopedWallTimer> scoped_wall_timer_##line; \
|
||||
const bool activated##line = show_output; \
|
||||
if (activated##line) { \
|
||||
static mediapipe::ScopedWallTimer::Accumulator* \
|
||||
scoped_wall_timer_accum_##line = \
|
||||
new mediapipe::ScopedWallTimer::Accumulator; \
|
||||
scoped_wall_timer_##line.reset(new mediapipe::ScopedWallTimer( \
|
||||
file, line, show_output, scoped_wall_timer_accum_##line)); \
|
||||
} \
|
||||
if (activated##line) /* NOLINT */ \
|
||||
scoped_wall_timer_##line->stream()
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
class ScopedWallTimer {
|
||||
public:
|
||||
// Helper class for accumulating time across multiple calls to a scoped wall
|
||||
// timer. Thread-safe (except on ANDROID, for which Mutex is mocked out).
|
||||
class Accumulator {
|
||||
public:
|
||||
Accumulator() : accum_time_(0.0f), count_(0) {}
|
||||
Accumulator(const Accumulator&) = delete;
|
||||
Accumulator& operator=(const Accumulator&) = delete;
|
||||
|
||||
// Accumulates passed_time into accumulators. Returns total time and number
|
||||
// of calls made.
|
||||
void Accumulate(double passed_time, double* accum_time, int* count) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
accum_time_ += passed_time;
|
||||
++count_;
|
||||
*accum_time = accum_time_;
|
||||
*count = count_;
|
||||
}
|
||||
|
||||
private:
|
||||
double accum_time_;
|
||||
int count_;
|
||||
absl::Mutex mutex_;
|
||||
};
|
||||
|
||||
// Creates a new ScopedWallTimer for current file and line. LogMessage is only
|
||||
// initialized if show_output is set to true.
|
||||
ScopedWallTimer(const char* file, int line, bool show_output,
|
||||
Accumulator* accumulator)
|
||||
: file_(file),
|
||||
line_(line),
|
||||
show_output_(show_output),
|
||||
accumulator_(accumulator) {
|
||||
if (show_output_) {
|
||||
CHECK(accumulator_);
|
||||
start_time_ = GetWallTime();
|
||||
}
|
||||
}
|
||||
ScopedWallTimer(const ScopedWallTimer&) = delete;
|
||||
ScopedWallTimer& operator=(const ScopedWallTimer&) = delete;
|
||||
|
||||
// Destructor measures time and outputs to stream.
|
||||
~ScopedWallTimer() {
|
||||
if (show_output_) {
|
||||
double passed_time = GetWallTime() - start_time_;
|
||||
double accum_time = 0.0;
|
||||
int count = 0;
|
||||
accumulator_->Accumulate(passed_time, &accum_time, &count);
|
||||
LOG(INFO) << stream_.str() << " TIMES: [Curr: " << passed_time * 1e-6
|
||||
<< " ms, "
|
||||
<< "Avg: " << accum_time * 1e-6 / std::max(1, count) << " ms, "
|
||||
<< count << " calls]";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& stream() { return stream_; }
|
||||
|
||||
private:
|
||||
const char* file_;
|
||||
int line_;
|
||||
bool show_output_;
|
||||
// We need to buffer information passed via stream operator <<
|
||||
// While LogMessage is adequate for this, no good equivalent exists on
|
||||
// Android, so we employ a portable ostringstream for buffering.
|
||||
std::ostringstream stream_;
|
||||
int64 start_time_;
|
||||
Accumulator* accumulator_;
|
||||
|
||||
int64 GetWallTime() { return absl::GetCurrentTimeNanos(); }
|
||||
};
|
||||
|
||||
class MeasureTimeFilter {
|
||||
public:
|
||||
static const MeasureTimeFilter* get() {
|
||||
static MeasureTimeFilter instance;
|
||||
return &instance;
|
||||
}
|
||||
MeasureTimeFilter(const MeasureTimeFilter&) = delete;
|
||||
MeasureTimeFilter& operator=(const MeasureTimeFilter&) = delete;
|
||||
|
||||
bool Matches(const std::string& item) const {
|
||||
for (const std::string& match_item : match_items_) {
|
||||
if (item.find(match_item) != std::string::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
explicit MeasureTimeFilter() {}
|
||||
explicit MeasureTimeFilter(const std::string& filter) {
|
||||
match_items_ = absl::StrSplit(filter, absl::ByChar(','));
|
||||
}
|
||||
std::vector<std::string> match_items_;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
|
||||
850
mediapipe/util/tracking/motion_analysis.cc
Normal file
|
|
@ -0,0 +1,850 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/motion_analysis.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/vector.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/image_util.h"
|
||||
#include "mediapipe/util/tracking/measure_time.h"
|
||||
#include "mediapipe/util/tracking/motion_saliency.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow_computation.h"
|
||||
#include "mediapipe/util/tracking/region_flow_computation.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow_visualization.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
MotionAnalysis::MotionAnalysis(const MotionAnalysisOptions& options,
|
||||
int frame_width, int frame_height)
|
||||
: options_(options),
|
||||
frame_width_(frame_width),
|
||||
frame_height_(frame_height) {
|
||||
// Init options by policy.
|
||||
InitPolicyOptions();
|
||||
// Merge back in any overriden options.
|
||||
options_.MergeFrom(options);
|
||||
|
||||
region_flow_computation_.reset(new RegionFlowComputation(
|
||||
options_.flow_options(), frame_width_, frame_height_));
|
||||
motion_estimation_.reset(new MotionEstimation(options_.motion_options(),
|
||||
frame_width_, frame_height_));
|
||||
|
||||
if (options_.compute_motion_saliency()) {
|
||||
motion_saliency_.reset(new MotionSaliency(options_.saliency_options(),
|
||||
frame_width_, frame_height_));
|
||||
|
||||
// Compute overlap needed between clips if filtering or smoothing of
|
||||
// saliency is requested.
|
||||
if (options_.select_saliency_inliers()) {
|
||||
// 1.65 sigmas in each direction = 90% of variance is captured.
|
||||
overlap_size_ = std::max<int>(
|
||||
overlap_size_, options_.saliency_options().selection_frame_radius());
|
||||
}
|
||||
|
||||
if (options_.filter_saliency()) {
|
||||
overlap_size_ = std::max<int>(
|
||||
overlap_size_,
|
||||
options_.saliency_options().filtering_sigma_time() * 1.65f);
|
||||
}
|
||||
}
|
||||
|
||||
long_feature_stream_.reset(new LongFeatureStream);
|
||||
|
||||
frame_num_ = 0;
|
||||
|
||||
// Determine if feature descriptors need to be computed.
|
||||
// Required for irls smoothing, overlay detection and mixture homographies.
|
||||
const bool compute_mixtures =
|
||||
options_.motion_options().mix_homography_estimation() !=
|
||||
MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE;
|
||||
|
||||
const bool use_spatial_bias =
|
||||
options_.motion_options().estimation_policy() ==
|
||||
MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS &&
|
||||
options_.motion_options().long_feature_bias_options().use_spatial_bias();
|
||||
|
||||
compute_feature_descriptors_ =
|
||||
options_.post_irls_smoothing() ||
|
||||
options_.motion_options().overlay_detection() || compute_mixtures ||
|
||||
use_spatial_bias;
|
||||
|
||||
if (compute_feature_descriptors_) {
|
||||
CHECK_EQ(RegionFlowComputationOptions::FORMAT_RGB,
|
||||
options_.flow_options().image_format())
|
||||
<< "Feature descriptors only support RGB currently.";
|
||||
prev_frame_.reset(new cv::Mat(frame_height_, frame_width_, CV_8UC3));
|
||||
}
|
||||
|
||||
// Setup streaming buffer. By default we buffer features and motion.
|
||||
// If saliency is computed, also buffer saliency and filtered/smoothed
|
||||
// output_saliency.
|
||||
std::vector<TaggedType> data_config{
|
||||
TaggedPointerType<RegionFlowFeatureList>("features"),
|
||||
TaggedPointerType<CameraMotion>("motion")};
|
||||
std::vector<TaggedType> data_config_saliency = data_config;
|
||||
data_config_saliency.push_back(
|
||||
TaggedPointerType<SalientPointFrame>("saliency"));
|
||||
data_config_saliency.push_back(
|
||||
TaggedPointerType<SalientPointFrame>("output_saliency"));
|
||||
|
||||
// Store twice the overlap.
|
||||
buffer_.reset(new StreamingBuffer(
|
||||
options_.compute_motion_saliency() ? data_config_saliency : data_config,
|
||||
2 * overlap_size_));
|
||||
}
|
||||
|
||||
void MotionAnalysis::InitPolicyOptions() {
|
||||
auto* flow_options = options_.mutable_flow_options();
|
||||
auto* tracking_options = flow_options->mutable_tracking_options();
|
||||
auto* motion_options = options_.mutable_motion_options();
|
||||
auto* feature_bias_options =
|
||||
motion_options->mutable_long_feature_bias_options();
|
||||
auto* translation_bounds =
|
||||
motion_options->mutable_stable_translation_bounds();
|
||||
auto* similarity_bounds = motion_options->mutable_stable_similarity_bounds();
|
||||
auto* homography_bounds = motion_options->mutable_stable_homography_bounds();
|
||||
|
||||
switch (options_.analysis_policy()) {
|
||||
case MotionAnalysisOptions::ANALYSIS_POLICY_LEGACY:
|
||||
break;
|
||||
|
||||
case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO:
|
||||
// Long track settings. Temporally consistent.
|
||||
options_.set_estimation_clip_size(64);
|
||||
|
||||
tracking_options->set_internal_tracking_direction(
|
||||
TrackingOptions::FORWARD);
|
||||
tracking_options->set_tracking_policy(
|
||||
TrackingOptions::POLICY_LONG_TRACKS);
|
||||
|
||||
feature_bias_options->set_use_spatial_bias(false);
|
||||
feature_bias_options->set_seed_priors_from_bias(true);
|
||||
|
||||
similarity_bounds->set_min_inlier_fraction(0.15f);
|
||||
homography_bounds->set_min_inlier_coverage(0.25f);
|
||||
flow_options->set_verify_long_features(false);
|
||||
|
||||
// Better features.
|
||||
tracking_options->set_adaptive_features_levels(3);
|
||||
tracking_options->set_use_cv_tracking_algorithm(true);
|
||||
|
||||
// Speed.
|
||||
flow_options->set_downsample_mode(
|
||||
RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
|
||||
|
||||
motion_options->set_estimation_policy(
|
||||
MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
|
||||
motion_options->set_feature_density_normalization(true);
|
||||
motion_options->set_overlay_detection(true);
|
||||
motion_options->set_domain_limited_irls_scaling(true);
|
||||
motion_options->set_irls_weights_preinitialized(true);
|
||||
motion_options->mutable_irls_initialization()->set_activated(true);
|
||||
motion_options->mutable_long_feature_initialization()->set_activated(
|
||||
true);
|
||||
|
||||
break;
|
||||
|
||||
case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO_MOBILE:
|
||||
// Long track settings. Temporally consistent.
|
||||
options_.set_estimation_clip_size(32);
|
||||
tracking_options->set_internal_tracking_direction(
|
||||
TrackingOptions::FORWARD);
|
||||
tracking_options->set_tracking_policy(
|
||||
TrackingOptions::POLICY_LONG_TRACKS);
|
||||
|
||||
motion_options->set_estimation_policy(
|
||||
MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
|
||||
motion_options->set_feature_density_normalization(true);
|
||||
motion_options->set_domain_limited_irls_scaling(true);
|
||||
motion_options->mutable_irls_initialization()->set_activated(true);
|
||||
motion_options->mutable_long_feature_initialization()->set_activated(
|
||||
true);
|
||||
|
||||
feature_bias_options->set_use_spatial_bias(false);
|
||||
feature_bias_options->set_seed_priors_from_bias(true);
|
||||
|
||||
similarity_bounds->set_inlier_threshold(2.0);
|
||||
similarity_bounds->set_min_inlier_fraction(0.15f);
|
||||
|
||||
// Speed for mobile.
|
||||
tracking_options->set_tracking_window_size(8);
|
||||
tracking_options->set_tracking_iterations(8);
|
||||
tracking_options->set_fractional_tracking_distance(0.1);
|
||||
tracking_options->set_reuse_features_max_frame_distance(15);
|
||||
tracking_options->set_max_features(500);
|
||||
tracking_options->set_use_cv_tracking_algorithm(true);
|
||||
|
||||
flow_options->set_downsample_mode(
|
||||
RegionFlowComputationOptions::DOWNSAMPLE_TO_MIN_SIZE);
|
||||
flow_options->set_round_downsample_factor(true);
|
||||
flow_options->set_downsampling_size(256);
|
||||
flow_options->set_pre_blur_sigma(0);
|
||||
flow_options->set_top_inlier_sets(1);
|
||||
flow_options->set_ransac_rounds_per_region(10);
|
||||
flow_options->mutable_visual_consistency_options()
|
||||
->set_compute_consistency(false);
|
||||
flow_options->set_verify_long_features(false);
|
||||
|
||||
// Avoid resetting features if a large amount is flagged.
|
||||
// In that case verify feature tracks to avoid pertubing the estimation.
|
||||
flow_options->set_verify_long_feature_acceleration(true);
|
||||
flow_options->set_verification_distance(5.0);
|
||||
flow_options->set_verify_long_feature_trigger_ratio(0.1);
|
||||
|
||||
motion_options->set_use_exact_homography_estimation(false);
|
||||
motion_options->set_use_highest_accuracy_for_normal_equations(false);
|
||||
|
||||
break;
|
||||
|
||||
case MotionAnalysisOptions::ANALYSIS_POLICY_CAMERA_MOBILE:
|
||||
// Long track settings.
|
||||
tracking_options->set_internal_tracking_direction(
|
||||
TrackingOptions::FORWARD);
|
||||
tracking_options->set_tracking_policy(
|
||||
TrackingOptions::POLICY_LONG_TRACKS);
|
||||
|
||||
motion_options->set_estimation_policy(
|
||||
MotionEstimationOptions::TEMPORAL_IRLS_MASK);
|
||||
motion_options->set_feature_density_normalization(true);
|
||||
motion_options->set_domain_limited_irls_scaling(true);
|
||||
motion_options->mutable_irls_initialization()->set_activated(true);
|
||||
motion_options->mutable_irls_initialization()->set_rounds(50);
|
||||
|
||||
feature_bias_options->set_use_spatial_bias(false);
|
||||
feature_bias_options->set_seed_priors_from_bias(true);
|
||||
|
||||
similarity_bounds->set_inlier_threshold(2.0);
|
||||
similarity_bounds->set_min_inlier_fraction(0.15f);
|
||||
|
||||
// Speed for mobile.
|
||||
tracking_options->set_tracking_window_size(8);
|
||||
tracking_options->set_tracking_iterations(8);
|
||||
tracking_options->set_fractional_tracking_distance(0.1);
|
||||
tracking_options->set_reuse_features_max_frame_distance(10);
|
||||
tracking_options->set_reuse_features_min_survived_frac(0.6);
|
||||
tracking_options->set_max_features(240);
|
||||
tracking_options->set_adaptive_tracking_distance(true);
|
||||
tracking_options->set_use_cv_tracking_algorithm(true);
|
||||
|
||||
// Assumes downsampled input.
|
||||
flow_options->set_pre_blur_sigma(0);
|
||||
flow_options->set_top_inlier_sets(1);
|
||||
flow_options->set_ransac_rounds_per_region(10);
|
||||
flow_options->mutable_visual_consistency_options()
|
||||
->set_compute_consistency(false);
|
||||
flow_options->set_verify_long_features(false);
|
||||
|
||||
motion_options->set_use_exact_homography_estimation(false);
|
||||
motion_options->set_use_highest_accuracy_for_normal_equations(false);
|
||||
|
||||
// Low latency.
|
||||
options_.set_estimation_clip_size(1);
|
||||
break;
|
||||
|
||||
case MotionAnalysisOptions::ANALYSIS_POLICY_HYPERLAPSE:
|
||||
// Long track settings. Temporally consistent.
|
||||
options_.set_estimation_clip_size(64);
|
||||
|
||||
tracking_options->set_internal_tracking_direction(
|
||||
TrackingOptions::FORWARD);
|
||||
tracking_options->set_tracking_policy(
|
||||
TrackingOptions::POLICY_LONG_TRACKS);
|
||||
|
||||
feature_bias_options->set_use_spatial_bias(false);
|
||||
feature_bias_options->set_seed_priors_from_bias(true);
|
||||
|
||||
// Relaxed stability bounds.
|
||||
translation_bounds->set_max_motion_stdev(0.15);
|
||||
translation_bounds->set_max_acceleration(60.0);
|
||||
translation_bounds->set_frac_max_motion_magnitude(0.25);
|
||||
|
||||
similarity_bounds->set_min_inlier_fraction(0.02f);
|
||||
similarity_bounds->set_min_inliers(5);
|
||||
similarity_bounds->set_lower_scale(0.5);
|
||||
similarity_bounds->set_upper_scale(2.0);
|
||||
similarity_bounds->set_limit_rotation(0.5);
|
||||
|
||||
homography_bounds->set_lower_scale(0.5);
|
||||
homography_bounds->set_upper_scale(2.0);
|
||||
homography_bounds->set_limit_rotation(0.5);
|
||||
homography_bounds->set_limit_perspective(0.004);
|
||||
homography_bounds->set_min_inlier_coverage(0.1);
|
||||
|
||||
// Relax constraining of homographies.
|
||||
motion_options->set_lin_sim_inlier_threshold(0.02);
|
||||
// Anticipate much higher registration errors.
|
||||
motion_options->set_irls_motion_magnitude_fraction(0.2);
|
||||
|
||||
flow_options->set_verify_long_features(false);
|
||||
|
||||
// Better features.
|
||||
tracking_options->set_adaptive_features_levels(3);
|
||||
|
||||
// Speed.
|
||||
tracking_options->set_use_cv_tracking_algorithm(true);
|
||||
flow_options->set_downsample_mode(
|
||||
RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
|
||||
|
||||
// Less quality features.
|
||||
flow_options->set_absolute_inlier_error_threshold(4);
|
||||
flow_options->set_relative_inlier_error_threshold(0.2);
|
||||
|
||||
motion_options->set_estimation_policy(
|
||||
MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
|
||||
motion_options->set_feature_density_normalization(true);
|
||||
|
||||
// TODO: This needs to be activated and then tighten relaxed
|
||||
// thresholds above again.
|
||||
// motion_options->set_domain_limited_irls_scaling(true);
|
||||
|
||||
motion_options->set_irls_weights_preinitialized(true);
|
||||
motion_options->mutable_irls_initialization()->set_activated(true);
|
||||
|
||||
// More weight towards long feature tracks.
|
||||
motion_options->mutable_long_feature_initialization()->set_activated(
|
||||
true);
|
||||
motion_options->mutable_long_feature_initialization()
|
||||
->set_min_length_percentile(0.975);
|
||||
motion_options->mutable_long_feature_initialization()
|
||||
->set_upweight_multiplier(50);
|
||||
|
||||
// 2nd pass, no need to do overlay again.
|
||||
motion_options->set_overlay_detection(false);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool MotionAnalysis::AddFrame(const cv::Mat& frame, int64 timestamp_usec,
|
||||
RegionFlowFeatureList* feature_list) {
|
||||
return AddFrameWithSeed(frame, timestamp_usec, Homography(), feature_list);
|
||||
}
|
||||
|
||||
bool MotionAnalysis::AddFrameWithSeed(const cv::Mat& frame,
|
||||
int64 timestamp_usec,
|
||||
const Homography& initial_transform,
|
||||
RegionFlowFeatureList* feature_list) {
|
||||
return AddFrameGeneric(frame, timestamp_usec, initial_transform,
|
||||
nullptr, // rejection_transform
|
||||
nullptr, // external features
|
||||
nullptr, // feature modification function
|
||||
feature_list);
|
||||
}
|
||||
|
||||
bool MotionAnalysis::AddFrameGeneric(
|
||||
const cv::Mat& frame, int64 timestamp_usec,
|
||||
const Homography& initial_transform, const Homography* rejection_transform,
|
||||
const RegionFlowFeatureList* external_features,
|
||||
std::function<void(RegionFlowFeatureList*)>* modify_features,
|
||||
RegionFlowFeatureList* output_feature_list) {
|
||||
// Don't check input sizes here, RegionFlowComputation does that based
|
||||
// on its internal options.
|
||||
CHECK(feature_computation_) << "Calls to AddFrame* can NOT be mixed "
|
||||
<< "with AddFeatures";
|
||||
|
||||
// Compute RegionFlow.
|
||||
{
|
||||
MEASURE_TIME << "CALL RegionFlowComputation::AddImage";
|
||||
if (!region_flow_computation_->AddImageWithSeed(frame, timestamp_usec,
|
||||
initial_transform)) {
|
||||
LOG(ERROR) << "Error while computing region flow.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Buffer features.
|
||||
std::unique_ptr<RegionFlowFeatureList> feature_list;
|
||||
{
|
||||
MEASURE_TIME << "CALL RegionFlowComputation::RetrieveRegionFlowFeatureList";
|
||||
const bool compute_feature_match_descriptors =
|
||||
compute_feature_descriptors_ && frame_num_ > 0;
|
||||
|
||||
// Determine which region flow should be computed in case multi frame
|
||||
// tracking has been requested.
|
||||
int max_track_index = 0;
|
||||
if (options_.flow_options().tracking_options().tracking_policy() ==
|
||||
TrackingOptions::POLICY_MULTI_FRAME) {
|
||||
max_track_index = std::min(
|
||||
options_.flow_options().tracking_options().multi_frames_to_track() -
|
||||
1,
|
||||
options_.track_index());
|
||||
}
|
||||
|
||||
feature_list.reset(
|
||||
region_flow_computation_->RetrieveMultiRegionFlowFeatureList(
|
||||
std::min(std::max(0, frame_num_ - 1), max_track_index),
|
||||
compute_feature_descriptors_, compute_feature_match_descriptors,
|
||||
&frame,
|
||||
compute_feature_match_descriptors ? prev_frame_.get() : nullptr));
|
||||
|
||||
if (feature_list == nullptr) {
|
||||
LOG(ERROR) << "Error retrieving feature list.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (external_features) {
|
||||
constexpr int kTrackIdShift = 1 << 20;
|
||||
for (const auto& external_feat : external_features->feature()) {
|
||||
auto* new_feat = feature_list->add_feature();
|
||||
*new_feat = external_feat;
|
||||
if (new_feat->track_id() >= 0) {
|
||||
new_feat->set_track_id(new_feat->track_id() + kTrackIdShift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rejection_transform) {
|
||||
RegionFlowFeatureList tmp_list;
|
||||
tmp_list.mutable_feature()->Swap(feature_list->mutable_feature());
|
||||
|
||||
for (const auto& feature : tmp_list.feature()) {
|
||||
const Vector2_f diff =
|
||||
TransformPoint(*rejection_transform, FeatureLocation(feature)) -
|
||||
FeatureMatchLocation(feature);
|
||||
if (diff.Norm() < options_.rejection_transform_threshold()) {
|
||||
*feature_list->add_feature() = feature;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (output_feature_list) {
|
||||
*output_feature_list = *feature_list;
|
||||
}
|
||||
|
||||
if (modify_features) {
|
||||
(*modify_features)(feature_list.get());
|
||||
}
|
||||
|
||||
buffer_->EmplaceDatum("features", feature_list.release());
|
||||
|
||||
// Store frame for next call.
|
||||
if (compute_feature_descriptors_) {
|
||||
frame.copyTo(*prev_frame_);
|
||||
}
|
||||
|
||||
++frame_num_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MotionAnalysis::AddFeatures(const RegionFlowFeatureList& features) {
|
||||
feature_computation_ = false;
|
||||
buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
|
||||
|
||||
++frame_num_;
|
||||
}
|
||||
|
||||
void MotionAnalysis::EnqueueFeaturesAndMotions(
|
||||
const RegionFlowFeatureList& features, const CameraMotion& motion) {
|
||||
feature_computation_ = false;
|
||||
CHECK(buffer_->HaveEqualSize({"motion", "features"}))
|
||||
<< "Can not be mixed with other Add* calls";
|
||||
buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
|
||||
buffer_->EmplaceDatum("motion", new CameraMotion(motion));
|
||||
}
|
||||
|
||||
cv::Mat MotionAnalysis::GetGrayscaleFrameFromResults() {
|
||||
return region_flow_computation_->GetGrayscaleFrameFromResults();
|
||||
}
|
||||
|
||||
int MotionAnalysis::GetResults(
|
||||
bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
|
||||
std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
|
||||
std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
|
||||
MEASURE_TIME << "GetResults";
|
||||
|
||||
const int num_features_lists = buffer_->BufferSize("features");
|
||||
const int num_new_feature_lists = num_features_lists - overlap_start_;
|
||||
CHECK_GE(num_new_feature_lists, 0);
|
||||
|
||||
if (!flush && num_new_feature_lists < options_.estimation_clip_size()) {
|
||||
// Nothing to compute, return.
|
||||
return 0;
|
||||
}
|
||||
|
||||
const bool compute_saliency = options_.compute_motion_saliency();
|
||||
CHECK_EQ(compute_saliency, saliency != nullptr)
|
||||
<< "Computing saliency requires saliency output and vice versa";
|
||||
|
||||
// Estimate motions for newly buffered RegionFlowFeatureLists, which also
|
||||
// computes IRLS feature weights for foreground estimation, if needed
|
||||
// (otherwise could be externally added).
|
||||
const int num_motions_to_compute =
|
||||
buffer_->BufferSize("features") - buffer_->BufferSize("motion");
|
||||
|
||||
if (num_motions_to_compute > 0) {
|
||||
std::vector<CameraMotion> camera_motions;
|
||||
std::vector<RegionFlowFeatureList*> feature_lists;
|
||||
for (int k = overlap_start_; k < num_features_lists; ++k) {
|
||||
feature_lists.push_back(
|
||||
buffer_->GetMutableDatum<RegionFlowFeatureList>("features", k));
|
||||
}
|
||||
|
||||
// TODO: Result should be vector of unique_ptr.
|
||||
motion_estimation_->EstimateMotionsParallel(
|
||||
options_.post_irls_smoothing(), &feature_lists, &camera_motions);
|
||||
|
||||
// Add solution to buffer.
|
||||
for (const auto& motion : camera_motions) {
|
||||
buffer_->EmplaceDatum("motion", new CameraMotion(motion));
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(buffer_->HaveEqualSize({"features", "motion"}));
|
||||
|
||||
if (compute_saliency) {
|
||||
ComputeSaliency();
|
||||
}
|
||||
|
||||
return OutputResults(flush, features, camera_motion, saliency);
|
||||
}
|
||||
|
||||
int MotionAnalysis::OutputResults(
|
||||
bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
|
||||
std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
|
||||
std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
|
||||
const bool compute_saliency = options_.compute_motion_saliency();
|
||||
CHECK_EQ(compute_saliency, saliency != nullptr)
|
||||
<< "Computing saliency requires saliency output and vice versa";
|
||||
CHECK(buffer_->HaveEqualSize({"features", "motion"}));
|
||||
|
||||
// Discard prev. overlap (already output, just used for filtering here).
|
||||
buffer_->DiscardData(buffer_->AllTags(), prev_overlap_start_);
|
||||
prev_overlap_start_ = 0;
|
||||
|
||||
// Output only frames not part of the overlap.
|
||||
const int num_output_frames =
|
||||
std::max(0, buffer_->MaxBufferSize() - (flush ? 0 : overlap_size_));
|
||||
|
||||
if (features) {
|
||||
features->reserve(num_output_frames);
|
||||
}
|
||||
if (camera_motion) {
|
||||
camera_motion->reserve(num_output_frames);
|
||||
}
|
||||
if (saliency) {
|
||||
saliency->reserve(num_output_frames);
|
||||
}
|
||||
|
||||
// Need to retain twice the overlap, for frames in that segment we need
|
||||
// to create a copy.
|
||||
const int new_overlap_start =
|
||||
std::max(0, num_output_frames - (flush ? 0 : overlap_size_));
|
||||
|
||||
// Populate output.
|
||||
for (int k = 0; k < num_output_frames; ++k) {
|
||||
std::unique_ptr<RegionFlowFeatureList> out_features;
|
||||
std::unique_ptr<CameraMotion> out_motion;
|
||||
std::unique_ptr<SalientPointFrame> out_saliency;
|
||||
|
||||
if (k >= new_overlap_start) {
|
||||
// Create copy.
|
||||
out_features.reset(new RegionFlowFeatureList(
|
||||
*buffer_->GetDatum<RegionFlowFeatureList>("features", k)));
|
||||
out_motion.reset(
|
||||
new CameraMotion(*buffer_->GetDatum<CameraMotion>("motion", k)));
|
||||
} else {
|
||||
// Release datum.
|
||||
out_features =
|
||||
buffer_->ReleaseDatum<RegionFlowFeatureList>("features", k);
|
||||
out_motion = buffer_->ReleaseDatum<CameraMotion>("motion", k);
|
||||
}
|
||||
|
||||
// output_saliency is temporary so we never need to buffer it.
|
||||
if (compute_saliency) {
|
||||
out_saliency =
|
||||
buffer_->ReleaseDatum<SalientPointFrame>("output_saliency", k);
|
||||
}
|
||||
|
||||
if (options_.subtract_camera_motion_from_features()) {
|
||||
std::vector<RegionFlowFeatureList*> feature_view{out_features.get()};
|
||||
SubtractCameraMotionFromFeatures({*out_motion}, &feature_view);
|
||||
}
|
||||
|
||||
if (features != nullptr) {
|
||||
features->push_back(std::move(out_features));
|
||||
}
|
||||
if (camera_motion != nullptr) {
|
||||
camera_motion->push_back(std::move(out_motion));
|
||||
}
|
||||
if (saliency != nullptr) {
|
||||
saliency->push_back(std::move(out_saliency));
|
||||
}
|
||||
}
|
||||
|
||||
// Reset for next chunk.
|
||||
prev_overlap_start_ = num_output_frames - new_overlap_start;
|
||||
CHECK_GE(prev_overlap_start_, 0);
|
||||
|
||||
CHECK(buffer_->TruncateBuffer(flush));
|
||||
|
||||
overlap_start_ = buffer_->MaxBufferSize();
|
||||
return num_output_frames;
|
||||
}
|
||||
|
||||
void MotionAnalysis::RenderResults(const RegionFlowFeatureList& feature_list,
|
||||
const CameraMotion& motion,
|
||||
const SalientPointFrame* saliency,
|
||||
cv::Mat* rendered_results) {
|
||||
#ifndef NO_RENDERING
|
||||
CHECK(rendered_results != nullptr);
|
||||
CHECK_EQ(frame_width_, rendered_results->cols);
|
||||
CHECK_EQ(frame_height_, rendered_results->rows);
|
||||
|
||||
const auto viz_options = options_.visualization_options();
|
||||
|
||||
// Visualize flow features if requested.
|
||||
if (viz_options.visualize_region_flow_features()) {
|
||||
cv::Scalar inlier_color(0, 255, 0);
|
||||
cv::Scalar outlier_color(255, 0, 0);
|
||||
if (feature_list.long_tracks()) {
|
||||
long_feature_stream_->AddFeatures(feature_list,
|
||||
true, // Check connectivity.
|
||||
true); // Purge non present ones.
|
||||
|
||||
VisualizeLongFeatureStream(
|
||||
*long_feature_stream_, inlier_color, outlier_color,
|
||||
viz_options.min_long_feature_track(),
|
||||
viz_options.max_long_feature_points(), 1.0f, 1.0f, rendered_results);
|
||||
} else {
|
||||
VisualizeRegionFlowFeatures(feature_list, inlier_color, outlier_color,
|
||||
true, 1.0f, 1.0f, rendered_results);
|
||||
}
|
||||
}
|
||||
|
||||
if (saliency != nullptr && viz_options.visualize_salient_points()) {
|
||||
static const cv::Scalar kColor(255, 0, 0);
|
||||
RenderSaliency(*saliency, kColor, viz_options.line_thickness(), false,
|
||||
rendered_results);
|
||||
}
|
||||
|
||||
if (viz_options.visualize_blur_analysis_region()) {
|
||||
VisualizeBlurAnalysisRegions(rendered_results);
|
||||
}
|
||||
|
||||
if (viz_options.visualize_stats()) {
|
||||
// Output general stats.
|
||||
std::string hud_text = absl::StrFormat(
|
||||
"H-cvg %.2f | H-err %4.2f | Avg.t %3.1f | dx: %+2.1f dy: %+2.1f "
|
||||
"Feat# %4d | %s | ",
|
||||
motion.homography_inlier_coverage(), motion.average_homography_error(),
|
||||
motion.average_magnitude(), motion.translation().dx(),
|
||||
motion.translation().dy(), feature_list.feature_size(),
|
||||
CameraMotionTypeToString(motion));
|
||||
hud_text += CameraMotionFlagToString(motion);
|
||||
|
||||
const float text_scale = frame_width_ * 5.e-4;
|
||||
cv::putText(*rendered_results, hud_text,
|
||||
cv::Point(0.02 * frame_width_, 0.975 * frame_height_),
|
||||
cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
|
||||
text_scale * 3, CV_AA);
|
||||
|
||||
cv::putText(*rendered_results,
|
||||
absl::StrFormat("%6d", motion.timestamp_usec() / 1000),
|
||||
cv::Point(0.9 * frame_width_, 0.05 * frame_height_),
|
||||
cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
|
||||
text_scale * 3, CV_AA);
|
||||
}
|
||||
#else
|
||||
LOG(FATAL) << "Code stripped out because of NO_RENDERING";
|
||||
#endif
|
||||
}
|
||||
|
||||
void MotionAnalysis::ComputeDenseForeground(
|
||||
const RegionFlowFeatureList& feature_list,
|
||||
const CameraMotion& camera_motion, cv::Mat* foreground_mask) {
|
||||
const auto& foreground_options = options_.foreground_options();
|
||||
|
||||
if (foreground_push_pull_ == nullptr) {
|
||||
foreground_push_pull_.reset(
|
||||
new PushPullFilteringC1(cv::Size(frame_width_, frame_height_),
|
||||
PushPullFilteringC1::BINOMIAL_5X5, false,
|
||||
nullptr, // Gaussian filter weights only.
|
||||
nullptr, // No mip map visualizer.
|
||||
nullptr)); // No weight adjustment.
|
||||
}
|
||||
|
||||
// Determine foreground weights for each features.
|
||||
std::vector<float> foreground_weights;
|
||||
ForegroundWeightsFromFeatures(
|
||||
feature_list, foreground_options.foreground_threshold(),
|
||||
foreground_options.foreground_gamma(),
|
||||
foreground_options.threshold_coverage_scaling() ? &camera_motion
|
||||
: nullptr,
|
||||
&foreground_weights);
|
||||
|
||||
// Setup push pull map (with border). Ensure constructor used the right type.
|
||||
CHECK(foreground_push_pull_->filter_type() ==
|
||||
PushPullFilteringC1::BINOMIAL_5X5 ||
|
||||
foreground_push_pull_->filter_type() ==
|
||||
PushPullFilteringC1::GAUSSIAN_5X5);
|
||||
|
||||
cv::Mat foreground_map(frame_height_ + 4, frame_width_ + 4, CV_32FC2);
|
||||
std::vector<Vector2_f> feature_locations;
|
||||
std::vector<cv::Vec<float, 1>> feature_irls;
|
||||
|
||||
for (int feat_idx = 0; feat_idx < foreground_weights.size(); ++feat_idx) {
|
||||
// Skip marked outliers.
|
||||
if (foreground_weights[feat_idx] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
feature_locations.push_back(
|
||||
FeatureLocation(feature_list.feature(feat_idx)));
|
||||
feature_irls.push_back(cv::Vec<float, 1>(foreground_weights[feat_idx]));
|
||||
}
|
||||
|
||||
// Run push pull.
|
||||
foreground_push_pull_->PerformPushPull(feature_locations, feature_irls, 0.2,
|
||||
cv::Point2i(0, 0),
|
||||
0, // Default read out level.
|
||||
nullptr, // Uniform weights.
|
||||
nullptr, // No bilateral term.
|
||||
&foreground_map);
|
||||
|
||||
// Convert to grayscale output.
|
||||
foreground_mask->create(frame_height_, frame_width_, CV_8U);
|
||||
for (int i = 0; i < frame_height_; ++i) {
|
||||
const float* src_ptr = foreground_map.ptr<float>(i);
|
||||
uint8* dst_ptr = foreground_mask->ptr<uint8>(i);
|
||||
for (int j = 0; j < frame_width_; ++j) {
|
||||
// Result is in first channel (second is confidence).
|
||||
dst_ptr[j] =
|
||||
std::max(0, std::min(255, static_cast<int>(src_ptr[2 * j] * 255.0f)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MotionAnalysis::VisualizeDenseForeground(const cv::Mat& foreground_mask,
|
||||
cv::Mat* output) {
|
||||
CHECK(output != nullptr);
|
||||
CHECK(foreground_mask.size() == output->size());
|
||||
// Map foreground measure to color (green by default).
|
||||
std::vector<Vector3_f> color_map;
|
||||
if (options_.visualization_options().foreground_jet_coloring()) {
|
||||
JetColoring(1000, &color_map);
|
||||
} else {
|
||||
color_map.resize(1000, Vector3_f(0, 255, 0));
|
||||
}
|
||||
|
||||
auto clamp = [](int value) -> int {
|
||||
return std::max(0, std::min(255, value));
|
||||
};
|
||||
|
||||
// Burn-in alpha compositing.
|
||||
const float alpha = 1.3f;
|
||||
for (int i = 0; i < frame_height_; ++i) {
|
||||
uint8* image_ptr = output->ptr<uint8>(i);
|
||||
const uint8* foreground_ptr = foreground_mask.ptr<uint8>(i);
|
||||
|
||||
for (int j = 0; j < frame_width_; ++j) {
|
||||
const float norm_foreground = foreground_ptr[j] * (1.0 / 255.0f);
|
||||
const float foreground = norm_foreground * alpha;
|
||||
const float alpha_denom = 1.0f / (1.0f + foreground);
|
||||
|
||||
Vector3_f color =
|
||||
color_map[static_cast<int>(norm_foreground * (color_map.size() - 1))];
|
||||
|
||||
image_ptr[3 * j] =
|
||||
clamp((image_ptr[3 * j] + color[0] * foreground) * alpha_denom);
|
||||
image_ptr[3 * j + 1] =
|
||||
clamp((image_ptr[3 * j + 1] + color[1] * foreground) * alpha_denom);
|
||||
image_ptr[3 * j + 2] =
|
||||
clamp((image_ptr[3 * j + 2] + color[2] * foreground) * alpha_denom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MotionAnalysis::VisualizeBlurAnalysisRegions(cv::Mat* input_view) {
|
||||
CHECK(input_view != nullptr);
|
||||
|
||||
cv::Mat intensity;
|
||||
cv::cvtColor(*input_view, intensity, CV_RGB2GRAY);
|
||||
cv::Mat corner_values;
|
||||
cv::cornerMinEigenVal(intensity, corner_values, 3);
|
||||
|
||||
cv::Mat mask;
|
||||
region_flow_computation_->ComputeBlurMask(*input_view, &corner_values, &mask);
|
||||
|
||||
cv::Mat mask_3c;
|
||||
cv::cvtColor(mask, mask_3c, CV_GRAY2RGB);
|
||||
cv::addWeighted(*input_view, 0.5, mask_3c, 0.5, -128, *input_view);
|
||||
}
|
||||
|
||||
void MotionAnalysis::ComputeSaliency() {
|
||||
MEASURE_TIME << "Saliency computation.";
|
||||
CHECK_EQ(overlap_start_, buffer_->BufferSize("saliency"));
|
||||
|
||||
const int num_features_lists = buffer_->BufferSize("features");
|
||||
|
||||
// Compute saliency only for newly buffered RegionFlowFeatureLists.
|
||||
for (int k = overlap_start_; k < num_features_lists; ++k) {
|
||||
std::vector<float> foreground_weights;
|
||||
ForegroundWeightsFromFeatures(
|
||||
*buffer_->GetDatum<RegionFlowFeatureList>("features", k),
|
||||
options_.foreground_options().foreground_threshold(),
|
||||
options_.foreground_options().foreground_gamma(),
|
||||
options_.foreground_options().threshold_coverage_scaling()
|
||||
? buffer_->GetDatum<CameraMotion>("motion", k)
|
||||
: nullptr,
|
||||
&foreground_weights);
|
||||
|
||||
std::unique_ptr<SalientPointFrame> saliency(new SalientPointFrame());
|
||||
motion_saliency_->SaliencyFromFeatures(
|
||||
*buffer_->GetDatum<RegionFlowFeatureList>("features", k),
|
||||
&foreground_weights, saliency.get());
|
||||
|
||||
buffer_->AddDatum("saliency", std::move(saliency));
|
||||
}
|
||||
|
||||
CHECK(buffer_->HaveEqualSize({"features", "motion", "saliency"}));
|
||||
|
||||
// Clear output saliency and copy from saliency.
|
||||
buffer_->DiscardDatum("output_saliency",
|
||||
buffer_->BufferSize("output_saliency"));
|
||||
|
||||
for (int k = 0; k < buffer_->BufferSize("saliency"); ++k) {
|
||||
std::unique_ptr<SalientPointFrame> copy(new SalientPointFrame());
|
||||
*copy = *buffer_->GetDatum<SalientPointFrame>("saliency", k);
|
||||
buffer_->AddDatum("output_saliency", std::move(copy));
|
||||
}
|
||||
|
||||
// Create view.
|
||||
std::vector<SalientPointFrame*> saliency_view =
|
||||
buffer_->GetMutableDatumVector<SalientPointFrame>("output_saliency");
|
||||
|
||||
// saliency_frames are filtered after this point and ready for output.
|
||||
if (options_.select_saliency_inliers()) {
|
||||
motion_saliency_->SelectSaliencyInliers(&saliency_view, false);
|
||||
}
|
||||
|
||||
if (options_.filter_saliency()) {
|
||||
motion_saliency_->FilterMotionSaliency(&saliency_view);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
229
mediapipe/util/tracking/motion_analysis.h
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Module for performing motion analysis on a video stream including computing
|
||||
// locally filtered (robust) feature tracking, camera motion estimation, and
|
||||
// dense foreground saliency estimation.
|
||||
// Module buffers frames internally (using an adaptive overlap to achieve
|
||||
// temporal consistency):
|
||||
//
|
||||
// Usage example:
|
||||
//
|
||||
// MotionAnalysisOptions options();
|
||||
// // Should be always be a multiple 8 for optimal parallel performance
|
||||
// options.set_estimation_clip_size(16);
|
||||
// MotionAnalysis motion_analysis(options, 960, 540);
|
||||
|
||||
// std::vector<cv::Mat> input_frames(N);
|
||||
// // Define output vectors.
|
||||
// std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
|
||||
// std::vector<std::unique_ptr<CameraMotion>> camera_motion;
|
||||
// std::vector<std::unique_ptr<SalientPointFrame>> saliency;
|
||||
// std::vector<cv::Mat> rendered_results); // Should to be initialized with
|
||||
// // frame.
|
||||
//
|
||||
// for (int k = 0; k < N; ++k) {
|
||||
// motion_analysis.AddFrame(input_frames[k], 0);
|
||||
// // Outputs results, if new ones are available.
|
||||
// // Output will be all of the same lengths (Length returned by function).
|
||||
// if (motion_analysis.GetResults(k + 1 == N, // Flush, force output.
|
||||
// &features,
|
||||
// &camera_motion,
|
||||
// &saliency) > 0) { // Optional.
|
||||
// // Optionally render at i'th frame.
|
||||
// motion_analysis.RenderResults(*features[i],
|
||||
// *camera_motion[i],
|
||||
// saliency[i].get(),
|
||||
// &rendered_results[i]);
|
||||
//
|
||||
// // Output results...
|
||||
// }
|
||||
// }
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_analysis.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_saliency.h"
|
||||
#include "mediapipe/util/tracking/push_pull_filtering.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow_computation.h"
|
||||
#include "mediapipe/util/tracking/streaming_buffer.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
typedef PushPullFiltering<1, FilterWeightMultiplierOne> PushPullFlowC1;
|
||||
|
||||
class MotionAnalysis {
|
||||
public:
|
||||
MotionAnalysis(const MotionAnalysisOptions& options, int frame_width,
|
||||
int frame_height);
|
||||
~MotionAnalysis() = default;
|
||||
MotionAnalysis(const MotionAnalysis&) = delete;
|
||||
MotionAnalysis& operator=(const MotionAnalysis&) = delete;
|
||||
|
||||
// Call with every frame. Timestamp is optional (set to zero if not needed).
|
||||
// Optionally outputs list of features extracted from this frame.
|
||||
// Returns true on success.
|
||||
bool AddFrame(const cv::Mat& frame, int64 timestamp_usec,
|
||||
RegionFlowFeatureList* feature_list = nullptr);
|
||||
|
||||
// Same as above, but uses specified initial transform to seed
|
||||
// feature locations.
|
||||
bool AddFrameWithSeed(const cv::Mat& frame, int64 timestamp_usec,
|
||||
const Homography& initial_transform,
|
||||
RegionFlowFeatureList* feature_list = nullptr);
|
||||
|
||||
// Generic function to perform motion analysis on the passed frame,
|
||||
// with initial_transform used as seed.
|
||||
// Optionally accepts external_feature to be added to the computed ones,
|
||||
// and to reject all features that do not agree with the rejection transform
|
||||
// within a specified threshold (rejection_transform_threshold in options).
|
||||
// Also allows to modify feature locations before motion estimation by
|
||||
// supplying appropiate callback - this is invoked *after* the rejection
|
||||
// transform.
|
||||
// Returns list of features extracted from this frame, *before* any
|
||||
// modification is applied. To yield modified features, simply
|
||||
// apply modify_features function to returned result.
|
||||
bool AddFrameGeneric(
|
||||
const cv::Mat& frame, int64 timestamp_usec,
|
||||
const Homography& initial_transform,
|
||||
const Homography* rejection_transform = nullptr,
|
||||
const RegionFlowFeatureList* external_features = nullptr,
|
||||
std::function<void(RegionFlowFeatureList*)>* modify_features = nullptr,
|
||||
RegionFlowFeatureList* feature_list = nullptr);
|
||||
|
||||
// Instead of tracking passed frames, uses result directly as supplied by
|
||||
// features. Can not be mixed with above AddFrame* calls.
|
||||
void AddFeatures(const RegionFlowFeatureList& features);
|
||||
|
||||
// Instead of tracking and computing camera motions, simply adds precomputed
|
||||
// features and camera motions to the internal buffers. Can not be mixed
|
||||
// with above Add* calls.
|
||||
// This is useful for just computing saliency via GetResults.
|
||||
void EnqueueFeaturesAndMotions(const RegionFlowFeatureList& features,
|
||||
const CameraMotion& motion);
|
||||
|
||||
// Returns motion results (features, camera motions and saliency, all
|
||||
// optional).
|
||||
// Call after every AddFrame for optimal performance.
|
||||
// Returns number of available results. Note, this call with often return
|
||||
// zero, and only return results (multiple in this case) when chunk boundaries
|
||||
// are reached. The actual number returned depends on various smoothing
|
||||
// settings for saliency and features.
|
||||
// Set flush to true, to force output of all results (e.g. when the end of the
|
||||
// video stream is reached).
|
||||
// Note: Passing a non-zero argument for saliency, requires
|
||||
// MotionAnalysisOptions::compute_motion_saliency to be set and
|
||||
// vice versa. (CHECKED)
|
||||
int GetResults(
|
||||
bool flush, // Forces output.
|
||||
std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
|
||||
std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
|
||||
std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
|
||||
|
||||
// Exposes the grayscale image frame from the most recently created region
|
||||
// flow tracking data.
|
||||
cv::Mat GetGrayscaleFrameFromResults();
|
||||
|
||||
// Renders features and saliency to rendered_results based on
|
||||
// VisualizationOptions onto pre-initialized rendered_results (in most cases
|
||||
// you want to create a copy of the input frame).
|
||||
// NOTE:
|
||||
// If features are requested to be rendered, this function should be
|
||||
// called serially with each frame, or wrong feature location might be
|
||||
// rendered.
|
||||
void RenderResults(const RegionFlowFeatureList& features,
|
||||
const CameraMotion& camera_motion,
|
||||
const SalientPointFrame* saliency, // Optional.
|
||||
cv::Mat* rendered_results);
|
||||
|
||||
// Determines dense foreground mask from features.
|
||||
// Returns foreground mask as CV_8U image, indicating propability of
|
||||
// foreground.
|
||||
void ComputeDenseForeground(const RegionFlowFeatureList& feature_list,
|
||||
const CameraMotion& camera_motion,
|
||||
cv::Mat* foreground_mask);
|
||||
|
||||
// Overlays foreground mask over output as green burn in, or with
|
||||
// jet_coloring (based on options).
|
||||
void VisualizeDenseForeground(const cv::Mat& foreground_mask,
|
||||
cv::Mat* output);
|
||||
|
||||
// Masks out regions from input that are not used for blur analysis.
|
||||
void VisualizeBlurAnalysisRegions(cv::Mat* input);
|
||||
|
||||
// Number of frames/features added so far.
|
||||
int NumFrames() const { return frame_num_; }
|
||||
|
||||
private:
|
||||
void InitPolicyOptions();
|
||||
|
||||
// Compute saliency from buffered features and motions.
|
||||
void ComputeSaliency();
|
||||
|
||||
// Outputs computed results from the streaming buffer to the optional
|
||||
// output args. Also performs overlap handling.
|
||||
int OutputResults(
|
||||
bool flush, // Forces output.
|
||||
std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
|
||||
std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
|
||||
std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
|
||||
|
||||
MotionAnalysisOptions options_;
|
||||
int frame_width_ = 0;
|
||||
int frame_height_ = 0;
|
||||
int frame_num_ = 0;
|
||||
|
||||
// Internal objects for actual motion analysis.
|
||||
std::unique_ptr<RegionFlowComputation> region_flow_computation_;
|
||||
std::unique_ptr<MotionEstimation> motion_estimation_;
|
||||
std::unique_ptr<MotionSaliency> motion_saliency_;
|
||||
std::unique_ptr<PushPullFlowC1> foreground_push_pull_;
|
||||
// Used for visualization if long feature tracks are present.
|
||||
std::unique_ptr<LongFeatureStream> long_feature_stream_;
|
||||
|
||||
std::unique_ptr<StreamingBuffer> buffer_;
|
||||
|
||||
// Indicates where previous overlap in above buffers starts (earlier data is
|
||||
// just to improve smoothing).
|
||||
int prev_overlap_start_ = 0;
|
||||
// Indicates where actual overlap starts (data after this has not been
|
||||
// output).
|
||||
int overlap_start_ = 0;
|
||||
|
||||
// Buffers previous frame.
|
||||
std::unique_ptr<cv::Mat> prev_frame_;
|
||||
|
||||
bool compute_feature_descriptors_ = false;
|
||||
|
||||
// Amount of overlap between clips. Determined from saliency smoothing
|
||||
// and filtering options.
|
||||
int overlap_size_ = 0;
|
||||
|
||||
bool feature_computation_ = true;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
|
||||
154
mediapipe/util/tracking/motion_analysis.proto
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/util/tracking/motion_estimation.proto";
|
||||
import "mediapipe/util/tracking/motion_saliency.proto";
|
||||
import "mediapipe/util/tracking/region_flow_computation.proto";
|
||||
|
||||
// Settings for MotionAnalysis. This class computes sparse, locally consistent
|
||||
// flow (referred to as region flow), camera motions, and foreground saliency
|
||||
// (i.e. likely foreground objects moving different from the background).
|
||||
// Next tag: 16
|
||||
message MotionAnalysisOptions {
|
||||
// Pre-configured policies for MotionAnalysis.
|
||||
// For general use, it is recommended to select an appropiate policy
|
||||
// instead of customizing flow and motion options by hand.
|
||||
// Policies are being kept up to date with appropiate settings.
|
||||
enum AnalysisPolicy {
|
||||
// Default legacy options. Effectivley no op.
|
||||
ANALYSIS_POLICY_LEGACY = 0;
|
||||
|
||||
// Use for video.
|
||||
ANALYSIS_POLICY_VIDEO = 1;
|
||||
|
||||
// Use for video on mobile.
|
||||
ANALYSIS_POLICY_VIDEO_MOBILE = 2;
|
||||
|
||||
// Use if applied to camera stream on mobile, e.g.
|
||||
// low latency and high throughput.
|
||||
// ASSUMES DOWNSAMPLED INPUT, e.g. from GPU.
|
||||
ANALYSIS_POLICY_CAMERA_MOBILE = 3;
|
||||
|
||||
// Use for sped up video / hyperlapse when adding frames with seeds
|
||||
// and rejection transforms. Mostly ups temporal consistency weights
|
||||
// and relaxes stability constraints.
|
||||
// Only recommended to be used as second pass after initial MotionAnalysis
|
||||
// and FrameSelection.
|
||||
ANALYSIS_POLICY_HYPERLAPSE = 4;
|
||||
}
|
||||
|
||||
optional AnalysisPolicy analysis_policy = 14
|
||||
[default = ANALYSIS_POLICY_LEGACY];
|
||||
|
||||
// Options for the actual motion stabilization
|
||||
// (in order of object usage).
|
||||
optional RegionFlowComputationOptions flow_options = 1;
|
||||
optional MotionEstimationOptions motion_options = 2;
|
||||
optional MotionSaliencyOptions saliency_options = 3;
|
||||
|
||||
// Clip-size used for (parallelized) motion estimation.
|
||||
optional int32 estimation_clip_size = 4 [default = 16];
|
||||
|
||||
// If set, camera motion is subtracted from features before output.
|
||||
// Effectively outputs, residual motion w.r.t. background.
|
||||
optional bool subtract_camera_motion_from_features = 5 [default = false];
|
||||
|
||||
// If flow_options().tracking_options().tracking_policy() equals
|
||||
// POLICY_MULTI_FRAME, this flag indicates which RegionFlowFeatureList to use.
|
||||
// Specifically, for frame C, we use the motion from C to C - 1 - track_index.
|
||||
optional int32 track_index = 6 [default = 0];
|
||||
|
||||
// If set, compute motion saliency (regions of moving foreground).
|
||||
optional bool compute_motion_saliency = 7 [default = false];
|
||||
|
||||
// Selects saliency inliers (only saliency locations with sufficient
|
||||
// spatial and temporal support are kept).
|
||||
// Only applied when compute_motion_saliency is set.
|
||||
optional bool select_saliency_inliers = 8 [default = true];
|
||||
|
||||
// Performs spatio-temporal filtering of extracted foreground saliency. If
|
||||
// used with above selection of saliency inliers, filtering is performed
|
||||
// *after* inlier selection.
|
||||
// Only applied when compute_motion_saliency is set.
|
||||
optional bool filter_saliency = 9 [default = true];
|
||||
|
||||
// If set, irls weights of motion estimation are spatio-temporally smoothed
|
||||
// after model estimation.
|
||||
optional bool post_irls_smoothing = 10 [default = false];
|
||||
|
||||
// If a rejection_transform is passed to AddFrameGeneric, features that
|
||||
// do not agree with the transform within below threshold are removed.
|
||||
optional float rejection_transform_threshold = 13 [default = 20.0];
|
||||
|
||||
// Adapts visualization for rendered_results when passed to GetResults.
|
||||
message VisualizationOptions {
|
||||
// Visualizes tracked region flow features, colored w.r.t. fitting error.
|
||||
optional bool visualize_region_flow_features = 1 [default = true];
|
||||
|
||||
// Visualizes salient points. Only applicable is compute_motion_saliency is
|
||||
// set to true.
|
||||
optional bool visualize_salient_points = 2 [default = false];
|
||||
|
||||
// Line thickness of ellipse when rendering salient points.
|
||||
optional int32 line_thickness = 5 [default = 4];
|
||||
|
||||
// Instead of green burn in uses jet coloring to indicate magnitude of
|
||||
// foreground motion.
|
||||
optional bool foreground_jet_coloring = 3 [default = false];
|
||||
|
||||
// If set, only keeps masks of pixels that is used for blur analysis, rest
|
||||
// is set to zero.
|
||||
optional bool visualize_blur_analysis_region = 4 [default = false];
|
||||
|
||||
optional bool visualize_stats = 6 [default = true];
|
||||
|
||||
// Only long feature tracks with specified minimum length are rendered.
|
||||
// Set to zero to consider all tracks.
|
||||
optional int32 min_long_feature_track = 7 [default = 0];
|
||||
|
||||
// Only the last N points of a long feature track are rendered. Set to zero
|
||||
// to render all points.
|
||||
optional int32 max_long_feature_points = 8 [default = 0];
|
||||
}
|
||||
|
||||
optional VisualizationOptions visualization_options = 11;
|
||||
|
||||
// Describes how to compute foreground from features.
|
||||
message ForegroundOptions {
|
||||
// Indicates the *inverse* registration error (i.e. the irls weight)
|
||||
// that is deemed a complete inlier.
|
||||
// Weights in the interval [0, foreground_threshold] (corresponding to
|
||||
// pixel errors in the interval [1 / foreground_threshold, inf])
|
||||
// are mappend to 1 - [0, 1], i.e. foreground threshold is mapped to zero
|
||||
// with weights below the threshold being assigned values > 0.
|
||||
// Therefore, larger values will increase amount of detected foreground
|
||||
// as well as noise.
|
||||
optional float foreground_threshold = 1 [default = 0.5];
|
||||
|
||||
// By using foreground_gamma < 1.0 you can increase resolution of small
|
||||
// foreground motion at the expense of the resolution of large foreground
|
||||
// motions.
|
||||
optional float foreground_gamma = 2 [default = 1.0];
|
||||
|
||||
// Threshold is scaled by coverage, i.e. for frames with large registration
|
||||
// error less forground is visualized.
|
||||
optional bool threshold_coverage_scaling = 3 [default = true];
|
||||
}
|
||||
|
||||
optional ForegroundOptions foreground_options = 12;
|
||||
}
|
||||
6009
mediapipe/util/tracking/motion_estimation.cc
Normal file
758
mediapipe/util/tracking/motion_estimation.h
Normal file
|
|
@ -0,0 +1,758 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Fits several linear motion models to the tracked features obtained
|
||||
// from RegionFlowComputation.
|
||||
//
|
||||
// --- Multi-threaded usage (parallel motion estimation over frames) ---
|
||||
// assume input: vector<RegionFlowFeatureList*> feature_lists
|
||||
// // Can be obtained after getting RegionFlowFrame from RegionFlowComputation
|
||||
// // and executing GetRegionFlowFeatureList (region_flow.h)
|
||||
// MotionEstimation motion_estimation(MotionEstimationOptions(),
|
||||
// frame_width,
|
||||
// frame_height);
|
||||
// vector<CameraMotion> camera_motions;
|
||||
// motion_estimation.EstimateMotionsParallel(false, // no IRLS smoothing.
|
||||
// &feature_lists,
|
||||
// &camera_motions);
|
||||
// // RegionFlowFeatureList can be discarded or passed to Cropper.
|
||||
//
|
||||
//
|
||||
// --- DEPRECATED, per-frame usage ---
|
||||
// assume input: RegionFlowFrame* flow_frame // from RegionFlowComputation.
|
||||
//
|
||||
// // Initialize with standard options.
|
||||
// MotionEstimation motion_estimation(MotionEstimationOptions(),
|
||||
// frame_width,
|
||||
// frame_height);
|
||||
// CameraMotion estimated_motion;
|
||||
// motion_estimation.EstimateMotion(flow_frame,
|
||||
// NULL, // deprecated param.
|
||||
// NULL, // deprecated param.
|
||||
// &estimation_motion);
|
||||
//
|
||||
// // If features are not needed anymore flow_frame can be be discarded now.
|
||||
//
|
||||
// // Pass motion models in estimated_motion onto MotionStabilization,
|
||||
// // if stabilization is desired.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/vector.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.pb.h"
|
||||
#include "mediapipe/util/tracking/motion_models.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
class Homography;
|
||||
class LinearSimilarityModel;
|
||||
class MixtureHomography;
|
||||
class MixtureRowWeights;
|
||||
class RegionFlowFeature;
|
||||
class RegionFlowFeatureList;
|
||||
class RegionFlowFrame;
|
||||
|
||||
class EstimateMotionIRLSInvoker;
|
||||
class InlierMask;
|
||||
class IrlsInitializationInvoker;
|
||||
// Thread local storage for pre-allocated memory.
|
||||
class MotionEstimationThreadStorage;
|
||||
class TrackFilterInvoker;
|
||||
|
||||
class MotionEstimation {
|
||||
public:
|
||||
MotionEstimation(const MotionEstimationOptions& options, int frame_width,
|
||||
int frame_height);
|
||||
virtual ~MotionEstimation();
|
||||
MotionEstimation(const MotionEstimation&) = delete;
|
||||
MotionEstimation& operator=(const MotionEstimation&) = delete;
|
||||
|
||||
// Can be used to re-initialize options between EstimateMotion /
|
||||
// EstimateMotionsParallel calls.
|
||||
void InitializeWithOptions(const MotionEstimationOptions& options);
|
||||
|
||||
// Estimates motion models from RegionFlowFeatureLists based on
|
||||
// MotionEstimationOptions, in a multithreaded manner (frame parallel).
|
||||
// The computed IRLS weights used on the last iteration of the highest
|
||||
// degree of freedom model are *written* to the irls_weight member for each
|
||||
// RegionFlowFeature in RegionFlowFeatureList which can be a useful
|
||||
// feature for later processing.
|
||||
// In addition the returned irls weights can be smoothed spatially
|
||||
// and temporally before they are output.
|
||||
// Note: The actual vector feature_lists is not modified.
|
||||
virtual void EstimateMotionsParallel(
|
||||
bool post_irls_weight_smoothing,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists,
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
// DEPRECATED function, estimating Camera motion from a single
|
||||
// RegionFlowFrame.
|
||||
virtual void EstimateMotion(const RegionFlowFrame& region_flow_frame,
|
||||
const int*, // deprecated, must be NULL.
|
||||
const int*, // deprecated, must be NULL.
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Public facing API to directly estimate motion models (as opposed to
|
||||
// a cascade of increasing degree of freedom motion models with appropiate
|
||||
// stability analysis via above EstimateMotionsParallel).
|
||||
// Use this if all you need is just the a specific motion
|
||||
// model describing/summarizing the motion of the RegionFlowFeatureList.
|
||||
// Returns false if model estimation failed (in this case an identity model
|
||||
// is set in camera_motion).
|
||||
// NOTE: All direct estimation functions assume normalized feature input,
|
||||
// i.e. transformed via NormalizeRegionFlowFeatureList.
|
||||
//
|
||||
// NOTE 2: For easy direct use see Fit* functions below class.
|
||||
bool EstimateTranslationModel(RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
bool EstimateLinearSimilarityModel(RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
bool EstimateAffineModel(RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
bool EstimateHomography(RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
bool EstimateMixtureHomography(RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
// Static function which sets motion models (requested in options) to identity
|
||||
// models.
|
||||
static void ResetMotionModels(const MotionEstimationOptions& options,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
// The following functions ResetTo* functions reset all models that are
|
||||
// present in camera_motion (tested via has_*) to identity or
|
||||
// the passed model (which is embedded in higher degree of freedom models
|
||||
// if applicable). CameraMotion::Type is set in accordance to function name.
|
||||
static void ResetToIdentity(CameraMotion* camera_motion,
|
||||
bool consider_valid = false); // Set to true
|
||||
// for type VALID
|
||||
|
||||
// Resets every specified model to embedded translation model.
|
||||
// CameraMotion type is set to UNSTABLE.
|
||||
static void ResetToTranslation(const TranslationModel& translation,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
// Resets every specified model with more or equal DOF than a similarity
|
||||
// to the passed model.
|
||||
// Camera Motion type is set to UNSTABLE_SIM.
|
||||
static void ResetToSimilarity(const LinearSimilarityModel& model,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
// Resets every specified model with more or equal DOF than a homography
|
||||
// to the passed model. If flag_as_unstable_model is set, camera motion type
|
||||
// is set to UNSTABLE_HOMOG.
|
||||
static void ResetToHomography(const Homography& model,
|
||||
bool flag_as_unstable_model,
|
||||
CameraMotion* camera_motion);
|
||||
|
||||
private:
|
||||
// Simple enum indicating with motion model should be estimated, mapped from
|
||||
// MotionEstimationOptions.
|
||||
enum MotionType {
|
||||
MODEL_AVERAGE_MAGNITUDE = 0,
|
||||
MODEL_TRANSLATION = 1,
|
||||
MODEL_LINEAR_SIMILARITY = 2,
|
||||
MODEL_AFFINE = 3,
|
||||
MODEL_HOMOGRAPHY = 4,
|
||||
MODEL_MIXTURE_HOMOGRAPHY = 5,
|
||||
// ... internal enum values used for mixture spectrum (up to 10 mixtures are
|
||||
// supported). Do not use directly.
|
||||
|
||||
// Change value if new motions are added.
|
||||
MODEL_NUM_VALUES = 16,
|
||||
};
|
||||
|
||||
// Determines shot boundaries from estimated motion models and input
|
||||
// feature_lists by setting the corresponding flag in CameraMotion.
|
||||
// Make sure, this function is called after motion estimation.
|
||||
void DetermineShotBoundaries(
|
||||
const std::vector<RegionFlowFeatureList*>& feature_lists,
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
// Implementation function to estimate CameraMotion's from
|
||||
// RegionFlowFeatureLists.
|
||||
void EstimateMotionsParallelImpl(
|
||||
bool irls_weights_preinitialized,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists,
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
struct SingleTrackClipData;
|
||||
struct EstimateModelOptions;
|
||||
|
||||
EstimateModelOptions DefaultModelOptions() const;
|
||||
|
||||
// Implementation function to estimate all motions for a specific type
|
||||
// across multiple single tracks. Motions are only estimated for those
|
||||
// CameraMotions with type less or equal to max_unstable_type. Flag
|
||||
// irls_weights_preinitialized enables some optimizations in case it is
|
||||
// set to false as features are not pre-initialized.
|
||||
// Optionally pass thread_storage.
|
||||
// Returns true if requested type was attempt to be estimated
|
||||
// (based on options) , false otherwise.
|
||||
bool EstimateMotionModels(
|
||||
const MotionType& max_type, const CameraMotion::Type& max_unstable_type,
|
||||
const EstimateModelOptions& options,
|
||||
const MotionEstimationThreadStorage* thread_storage, // optional.
|
||||
std::vector<SingleTrackClipData>* clip_datas) const;
|
||||
|
||||
// Multiplies input irls_weights by an upweight multiplier for each feature
|
||||
// that is part of a sufficiently large track (contribution of each track
|
||||
// length is by track_length_multiplier, mapping each track length
|
||||
// to an importance weight in [0, 1]).
|
||||
void LongFeatureInitialization(
|
||||
const RegionFlowFeatureList& feature_list,
|
||||
const LongFeatureInfo& feature_info,
|
||||
const std::vector<float>& track_length_importance,
|
||||
std::vector<float>* irls_weights) const;
|
||||
|
||||
// Multiplies input irls_weights by normalization factor that downweights
|
||||
// features is areas of high density.
|
||||
void FeatureDensityNormalization(const RegionFlowFeatureList& feature_list,
|
||||
std::vector<float>* irls_weights) const;
|
||||
|
||||
// Initializes irls weights (if
|
||||
// MotionEstimationOptions::irls_initialization::activated is true),
|
||||
// based on a multitude of options (RANSAC based pre-fitting of motion
|
||||
// models, homography initialization, etc.).
|
||||
// Processes one frame or all (if frame = -1) within a clip_data,
|
||||
void IrlsInitialization(const MotionType& type, int max_unstable_type,
|
||||
int frame, // Specify -1 for all frames.
|
||||
const EstimateModelOptions& options,
|
||||
SingleTrackClipData* clip_data) const;
|
||||
|
||||
// Estimation functions for models, called via options by EstimateMotion and
|
||||
// EstimateMotionsParallel.
|
||||
// NOTE: All direct estimation functions assume normalized feature input,
|
||||
// i.e. transformed via
|
||||
// TransformRegionFlowFeatureList(normalization_transform, feature_list);
|
||||
// where normalization_transform =
|
||||
// LinearSimilarityAdapter::NormalizationTransform(frame_width,
|
||||
// frame_height);
|
||||
//
|
||||
// Direct estimation functions perform estimation via iterated reweighted
|
||||
// least squares (IRLS). In this case specify number of iterations (10 is a
|
||||
// good default), and optionally the PriorFeatureWeights for each iteration.
|
||||
// The alphas specify, how much weight should be given to the
|
||||
// prior weight that the feature has before optimization. An alpha of zero
|
||||
// indicates, no prior weighting, whereas as an alpha of one corresponds to
|
||||
// full prior weighting. The actual prior is stored in priors.
|
||||
// Each iteration is reweighted by numerator / error, where error is the L2
|
||||
// fitting error after estimation and
|
||||
// numerator = (1.0 - alpha) * 1.0 + alpha * prior
|
||||
struct PriorFeatureWeights {
|
||||
explicit PriorFeatureWeights(int num_iterations)
|
||||
: alphas(num_iterations, 0.0f) {}
|
||||
PriorFeatureWeights(int num_iterations, int num_features)
|
||||
: alphas(num_iterations, 0.0f), priors(num_features, 1.0f) {}
|
||||
|
||||
// Tests for correct dimensions of PriorFeatureWeights.
|
||||
bool HasCorrectDimension(int num_iterations, int num_features) const {
|
||||
return alphas.size() == num_iterations && priors.size() == num_features;
|
||||
}
|
||||
|
||||
// Returns true if at least one alpha is non-zero.
|
||||
bool HasNonZeroAlpha() const {
|
||||
return !alphas.empty() &&
|
||||
*std::max_element(alphas.begin(), alphas.end()) > 0;
|
||||
}
|
||||
|
||||
// Returns true, if a prior was specified.
|
||||
bool HasPrior() const { return !priors.empty(); }
|
||||
|
||||
std::vector<float> alphas; // Alpha for each IRLS round.
|
||||
std::vector<float> priors; // Prior weight for each feature.
|
||||
|
||||
// If set, above alpha are not adjusted with iterations, but always set to
|
||||
// 1.0, given full weight to the prior.
|
||||
bool use_full_prior = false;
|
||||
};
|
||||
|
||||
// In addition, each estimation function can compute its corresponding
|
||||
// stability features and store it in CameraMotion. These features are needed
|
||||
// to test via the IsStable* functions further below.
|
||||
|
||||
// Estimates 2 DOF translation model.
|
||||
// Note: feature_list is assumed to be normalized/transformed by
|
||||
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
|
||||
// linear similarity are expressed in original frame, i.e. for estimated model
|
||||
// M, M' = N^(-1) M N is returned.
|
||||
void EstimateTranslationModelIRLS(
|
||||
int irls_rounds, bool compute_stability,
|
||||
RegionFlowFeatureList* feature_list,
|
||||
const PriorFeatureWeights* prior_weights, // optional.
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Estimates linear similarity from feature_list using irls_rounds iterative
|
||||
// reweighted least squares iterations. For L2 estimation, use irls_round = 1.
|
||||
// The irls_weight member of each RegionFlowFeature in feature_list will be
|
||||
// set to the inverse residual w.r.t. estimated LinearSimilarityModel.
|
||||
// Note: feature_list is assumed to be normalized/transformed by
|
||||
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
|
||||
// linear similarity are expressed in original frame, i.e. for estimated model
|
||||
// M, M' = N^(-1) M N is returned.
|
||||
// Returns true if estimation was successful, otherwise returns false and sets
|
||||
// the CameraMotion::type to INVALID.
|
||||
bool EstimateLinearSimilarityModelIRLS(
|
||||
int irls_rounds, bool compute_stability,
|
||||
RegionFlowFeatureList* feature_list,
|
||||
const PriorFeatureWeights* prior_weights, // optional.
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Same as above for affine motion.
|
||||
// Note: feature_list is assumed to be normalized/transformed by
|
||||
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
|
||||
// affine model are expressed in original frame, i.e. for estimated model
|
||||
// M, M' = N^(-1) M N is returned.
|
||||
bool EstimateAffineModelIRLS(int irls_rounds,
|
||||
RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Same as above for homography.
|
||||
// Note: feature_list is assumed to be normalized/transformed by
|
||||
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
|
||||
// homography are expressed in original frame, i.e. for estimated model
|
||||
// M, M' = N^(-1) M N is returned.
|
||||
// Returns true if estimation was successful, otherwise returns false and sets
|
||||
// the CameraMotion::type to INVALID.
|
||||
bool EstimateHomographyIRLS(
|
||||
int irls_rounds, bool compute_stability,
|
||||
const PriorFeatureWeights* prior_weights, // optional.
|
||||
MotionEstimationThreadStorage* thread_storage, // optional.
|
||||
RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
|
||||
|
||||
// Same as above for mixture homography.
|
||||
// Note: feature_list is assumed to be normalized/transformed by
|
||||
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
|
||||
// mixture homography are expressed in original frame, i.e. for estimated
|
||||
// model M, M' = N^(-1) M N is returned.
|
||||
// Mixture model estimation customized by MotionEstimationOptions.
|
||||
// Returns true if estimation was successful, otherwise returns false and sets
|
||||
// the CameraMotion::type to INVALID.
|
||||
// Supports computation for mixture spectrum, i.e. mixtures with different
|
||||
// regularizers. For default regularizer pass
|
||||
// MotionEstimationOptions::mixture_regularizer. Estimated motion will be
|
||||
// stored in CameraMotion::mixture_homography_spectrum(spectrum_idx).
|
||||
bool EstimateMixtureHomographyIRLS(
|
||||
int irls_rounds, bool compute_stability, float regularizer,
|
||||
int spectrum_idx, // 0 by default.
|
||||
const PriorFeatureWeights* prior_weights, // optional.
|
||||
MotionEstimationThreadStorage* thread_storage, // optional.
|
||||
RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
|
||||
|
||||
// Returns weighted variance for mean translation from feature_list (assumed
|
||||
// to be in normalized coordinates). Returned variance is in unnormalized
|
||||
// domain.
|
||||
float TranslationVariance(const RegionFlowFeatureList& feature_list,
|
||||
const Vector2_f& translation) const;
|
||||
|
||||
// Replace each features irls weight by robust min-filtered irls weight
|
||||
// across each track.
|
||||
void MinFilterIrlsWeightByTrack(SingleTrackClipData* clip_data) const;
|
||||
|
||||
// Performs filtering of irls weight across several tracking clip datas,
|
||||
// to yield consistent irls weights.
|
||||
void EnforceTrackConsistency(
|
||||
std::vector<SingleTrackClipData>* clip_datas) const;
|
||||
|
||||
// Initializes or modifies prior_weights for passed feature_list by
|
||||
// biasing toward previous (filtered) IRLS weight for that feature.
|
||||
// This enables temporal coherence.
|
||||
void BiasLongFeatures(RegionFlowFeatureList* feature_list, MotionType type,
|
||||
const EstimateModelOptions& model_options,
|
||||
PriorFeatureWeights* prior_weights) const;
|
||||
|
||||
// Called by above function to determine the bias each feature is multiplied
|
||||
// with.
|
||||
void BiasFromFeatures(const RegionFlowFeatureList& feature_list,
|
||||
MotionType type,
|
||||
const EstimateModelOptions& model_options,
|
||||
std::vector<float>* bias) const;
|
||||
|
||||
// Maps track index to tuple of spatial bias and number of similar
|
||||
// looking long tracks.
|
||||
typedef std::unordered_map<int, std::pair<float, float>> SpatialBiasMap;
|
||||
void ComputeSpatialBias(MotionType type,
|
||||
const EstimateModelOptions& model_options,
|
||||
RegionFlowFeatureList* feature_list,
|
||||
SpatialBiasMap* spatial_bias) const;
|
||||
|
||||
// Updates features weights in feature_list by temporally consistent bias.
|
||||
void UpdateLongFeatureBias(MotionType type,
|
||||
const EstimateModelOptions& model_options,
|
||||
bool remove_terminated_tracks,
|
||||
bool update_irls_observation,
|
||||
RegionFlowFeatureList* feature_list) const;
|
||||
|
||||
// Bilateral filtering of irls weights across the passed list.
|
||||
void SmoothIRLSWeights(std::deque<float>* irls) const;
|
||||
|
||||
// Helper function. Returns number of irls iterations for passed MotionType
|
||||
// derived from current MotionEstimationOptions. Returns zero, if no
|
||||
// estimation should be attempted.
|
||||
int IRLSRoundsFromSettings(const MotionType& type) const;
|
||||
|
||||
// Partitions irls_rounds into several rounds with each having irls_per_round
|
||||
// interations each based on MotionEstimationOptions::EstimationPolicy.
|
||||
// Post-condition: total_rounds * irls_per_rounds == irls_rounds.
|
||||
void PolicyToIRLSRounds(int irls_rounds, int* total_rounds,
|
||||
int* irls_per_round) const;
|
||||
|
||||
// Check for specified MotionType is estimated model is stable. If not, resets
|
||||
// feature's irls weights to reset_irls_weights (optional) and resets motion
|
||||
// model in camera_motion to lower degree of freedom model. In this case,
|
||||
// CameraMotion::Type is flagged as UNSTABLE_* where * denotes the lower
|
||||
// degree of freedom model.
|
||||
// Model is only checked those CameraMotions with type less than or equal to
|
||||
// max_unstable_type.
|
||||
void CheckModelStability(
|
||||
const MotionType& type, const CameraMotion::Type& max_unstable_type,
|
||||
const std::vector<std::vector<float>>* reset_irls_weights,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists,
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
// Implementation function called by above function, to check for a single
|
||||
// model.
|
||||
void CheckSingleModelStability(const MotionType& type,
|
||||
const CameraMotion::Type& max_unstable_type,
|
||||
const std::vector<float>* reset_irls_weights,
|
||||
RegionFlowFeatureList* feature_list,
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Projects motion model specified by type to lower degree of freedom models.
|
||||
void ProjectMotionsDown(const MotionType& type,
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
// Filters passed feature_lists based on
|
||||
// MotionEstimationOptions::irls_weight_filter.
|
||||
void IRLSWeightFilter(
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists) const;
|
||||
|
||||
// Inlier scale based on average motion magnitude and the fraction
|
||||
// of the magnitude that is still considered an inlier.
|
||||
// In general a residual of 1 pixel is assigned an IRLS weight of 1,
|
||||
// this function returns a residual scale, such that a residual
|
||||
// of distance_fraction * translation_magnitude equals an IRLS weight of 1
|
||||
// if multiplied by returned scale.
|
||||
float GetIRLSResidualScale(const float avg_motion_magnitude,
|
||||
float distance_fraction) const;
|
||||
|
||||
const LinearSimilarityModel& InverseNormalizationTransform() const {
|
||||
return inv_normalization_transform_;
|
||||
}
|
||||
|
||||
const LinearSimilarityModel& NormalizationTransform() const {
|
||||
return normalization_transform_;
|
||||
}
|
||||
|
||||
// Returns domain normalized features fall in.
|
||||
Vector2_f NormalizedDomain() const { return normalized_domain_; }
|
||||
|
||||
// Returns index within the inlier mask for each feature point.
|
||||
// Also returns for each bin normalizer to account for different number of
|
||||
// features per bin during weighting.
|
||||
void ComputeFeatureMask(const RegionFlowFeatureList& feature_list,
|
||||
std::vector<int>* mask_indices,
|
||||
std::vector<float>* bin_normalizer) const;
|
||||
|
||||
// Runs multiple rounds of RANSAC, resetting outlier IRLS weight to
|
||||
// a low score.
|
||||
// Optionally can perform temporally consistent selection if inlier_mask is
|
||||
// specified.
|
||||
// Returns best model across all iterations in best_model and true if
|
||||
// estimated model was deemed stable.
|
||||
bool GetTranslationIrlsInitialization(
|
||||
RegionFlowFeatureList* feature_list,
|
||||
const EstimateModelOptions& model_options, float avg_camera_motion,
|
||||
InlierMask* inlier_mask, // optional.
|
||||
TranslationModel* best_model) const;
|
||||
|
||||
// Same as above for linear similarities.
|
||||
bool GetSimilarityIrlsInitialization(
|
||||
RegionFlowFeatureList* feature_list,
|
||||
const EstimateModelOptions& model_options, float avg_camera_motion,
|
||||
InlierMask* inlier_mask, // optional.
|
||||
LinearSimilarityModel* best_model) const;
|
||||
|
||||
// Computes number of inliers and strict inliers (satisfying much stricter
|
||||
// threshold) for a given feature list after model fitting.
|
||||
void ComputeSimilarityInliers(const RegionFlowFeatureList& feature_list,
|
||||
int* num_inliers,
|
||||
int* num_strict_inliers) const;
|
||||
|
||||
// Initializes irls weights based on setting
|
||||
// MotionEstimationOptions::homography_irls_weight_initialization.
|
||||
void GetHomographyIRLSCenterWeights(const RegionFlowFeatureList& feature_list,
|
||||
std::vector<float>* center_weights) const;
|
||||
|
||||
// Checks for unreasonable large accelerationas between frames as specified by
|
||||
// MotionEstimationOptions::StableTranslationBounds.
|
||||
void CheckTranslationAcceleration(
|
||||
std::vector<CameraMotion>* camera_motions) const;
|
||||
|
||||
// Functions below, test passed model is deemed stable according to
|
||||
// several heuristics set by Stable[MODEL]Bounds in MotionEstimationOptions.
|
||||
bool IsStableTranslation(const TranslationModel& normalized_translation,
|
||||
float translation_variance,
|
||||
const RegionFlowFeatureList& features) const;
|
||||
|
||||
// Tests if passed similarity is stable. Pass number of inliers from
|
||||
// ComputeSimilarityInliers.
|
||||
bool IsStableSimilarity(const LinearSimilarityModel& model,
|
||||
const RegionFlowFeatureList& features,
|
||||
int num_inliers) const;
|
||||
|
||||
bool IsStableHomography(const Homography& homography,
|
||||
float average_homography_error,
|
||||
float inlier_coverage) const;
|
||||
|
||||
bool IsStableMixtureHomography(
|
||||
const MixtureHomography& homography, float min_block_inlier_coverage,
|
||||
const std::vector<float>& block_inlier_coverage) const;
|
||||
|
||||
// Computes fraction (in [0, 1]) of inliers w.r.t. frame area using a grid of
|
||||
// occupancy cells. A feature is consider an inlier if its irls_weight is
|
||||
// larger or equal to min_inlier_score.
|
||||
float GridCoverage(const RegionFlowFeatureList& feature_list,
|
||||
float min_inlier_score,
|
||||
MotionEstimationThreadStorage* thread_storage) const;
|
||||
|
||||
// Estimates per scanline-block coverage of mixture. If
|
||||
// assume_rolling_shutter_camera is set, low textured features are allowed to
|
||||
// have higher error as registration errors would not be as visible here.
|
||||
void ComputeMixtureCoverage(const RegionFlowFeatureList& feature_list,
|
||||
float min_inlier_score,
|
||||
bool assume_rolling_shutter_camera,
|
||||
MotionEstimationThreadStorage* thread_storage,
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Returns average motion magnitude as mean of the translation magnitude from
|
||||
// the 10th to 90th percentile.
|
||||
void EstimateAverageMotionMagnitude(const RegionFlowFeatureList& feature_list,
|
||||
CameraMotion* camera_motion) const;
|
||||
|
||||
// Returns per iteration weight of the feature's irls weight initialization.
|
||||
float IRLSPriorWeight(int iteration, int irls_rounds) const;
|
||||
|
||||
// Implementation function for above function. Estimates mixture homography
|
||||
// from features and returns true if estimation was non-degenerate.
|
||||
bool MixtureHomographyFromFeature(
|
||||
const TranslationModel& translation, int irls_rounds, float regularizer,
|
||||
const PriorFeatureWeights* prior_weights, // optional.
|
||||
RegionFlowFeatureList* feature_list,
|
||||
MixtureHomography* mix_homography) const;
|
||||
|
||||
// Determines overlay indices (spatial bin locations that are likely to be
|
||||
// affected by overlays) and stores them in corresponding member in
|
||||
// CameraMotion. Features that fall within these bins will be assigned a
|
||||
// weight of zero.
|
||||
void DetermineOverlayIndices(
|
||||
bool irls_weights_preinitialized,
|
||||
std::vector<CameraMotion>* camera_motions,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists) const;
|
||||
|
||||
// Determine features likely to be part of a static overlay, by setting their
|
||||
// irls weight to zero.
|
||||
// Returns fraction of the image domain that is considered to be occupied by
|
||||
// overlays and specific overlay cell indices in overlay_indices.
|
||||
float OverlayAnalysis(const std::vector<TranslationModel>& translations,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists,
|
||||
std::vector<int>* overlay_indices) const;
|
||||
|
||||
// Smooths feature's irls_weights spatio-temporally.
|
||||
void PostIRLSSmoothing(
|
||||
const std::vector<CameraMotion>& camera_motions,
|
||||
std::vector<RegionFlowFeatureList*>* feature_lists) const;
|
||||
|
||||
// Initializes LUT for gaussian weighting. By default discretizes the domain
|
||||
// [0, max_range] into 4K bins, returning scale to map from a value in the
|
||||
// domain to the corresponding bin. If scale is nullptr max_range bins are
|
||||
// created instead (in this case scale would be 1.0, i.e. value equals bin
|
||||
// index).
|
||||
void InitGaussLUT(float sigma, float max_range, std::vector<float>* lut,
|
||||
float* scale) const;
|
||||
|
||||
// Performs fast volumetric smoothing / filtering of irls weights. Weights are
|
||||
// expected to be already binned using BuildFeatureGrid.
|
||||
void RunTemporalIRLSSmoothing(
|
||||
const std::vector<FeatureGrid<RegionFlowFeature>>& feature_grid,
|
||||
const std::vector<std::vector<int>>& feature_taps_3,
|
||||
const std::vector<std::vector<int>>& feature_taps_5,
|
||||
const std::vector<float>& frame_confidence,
|
||||
std::vector<RegionFlowFeatureView>* feature_views) const;
|
||||
|
||||
private:
|
||||
MotionEstimationOptions options_;
|
||||
int frame_width_;
|
||||
int frame_height_;
|
||||
|
||||
LinearSimilarityModel normalization_transform_;
|
||||
LinearSimilarityModel inv_normalization_transform_;
|
||||
|
||||
// Transform from normalized features to irls domain.
|
||||
LinearSimilarityModel irls_transform_;
|
||||
|
||||
// Frame dimensions transformed by normalization transform.
|
||||
Vector2_f normalized_domain_;
|
||||
std::unique_ptr<MixtureRowWeights> row_weights_;
|
||||
|
||||
// For initialization biased towards previous frame.
|
||||
std::unique_ptr<InlierMask> inlier_mask_;
|
||||
|
||||
// Stores current bias for each track and the last K irls observations.
|
||||
struct LongFeatureBias {
|
||||
explicit LongFeatureBias(float initial_weight) : bias(initial_weight) {
|
||||
irls_values.push_back(1.0f / initial_weight);
|
||||
}
|
||||
|
||||
LongFeatureBias() : LongFeatureBias(1.0f) {}
|
||||
|
||||
float bias = 1.0f; // Current bias, stores pixel error,
|
||||
// i.e. 1 / IRLS.
|
||||
std::vector<float> irls_values; // Recently observed IRLS values;
|
||||
// Ring buffer.
|
||||
int total_observations = 1;
|
||||
};
|
||||
|
||||
// Maps track id to LongFeatureBias.
|
||||
typedef std::unordered_map<int, LongFeatureBias> LongFeatureBiasMap;
|
||||
|
||||
// Bias map indexed by MotionType.
|
||||
mutable std::vector<LongFeatureBiasMap> long_feature_bias_maps_ =
|
||||
std::vector<LongFeatureBiasMap>(static_cast<int>(MODEL_NUM_VALUES));
|
||||
|
||||
// Lookup tables and scale for FeatureBias computation.
|
||||
struct FeatureBiasLUT {
|
||||
// For ComputeSpatialBias weighting.
|
||||
std::vector<float> spatial_lut;
|
||||
float spatial_scale;
|
||||
std::vector<float> color_lut;
|
||||
float color_scale;
|
||||
|
||||
// For BiasFromFeature computation.
|
||||
std::vector<float> bias_weight_lut;
|
||||
float bias_weight_scale;
|
||||
};
|
||||
|
||||
FeatureBiasLUT feature_bias_lut_;
|
||||
|
||||
// Counts the number of consecutive duplicate frames for each motion model.
|
||||
mutable std::vector<int> num_duplicate_frames_ =
|
||||
std::vector<int>(static_cast<int>(MODEL_NUM_VALUES));
|
||||
|
||||
friend class EstimateMotionIRLSInvoker;
|
||||
friend class IrlsInitializationInvoker;
|
||||
friend class TrackFilterInvoker;
|
||||
friend class MotionEstimationThreadStorage;
|
||||
};
|
||||
|
||||
// Meta-data set in the header of filter streams to communicate information used
|
||||
// during camera motion estimation.
|
||||
struct CameraMotionStreamHeader {
|
||||
CameraMotionStreamHeader() : frame_width(0), frame_height(0) {}
|
||||
int32 frame_width;
|
||||
int32 frame_height;
|
||||
};
|
||||
|
||||
// Direct fitting functions.
|
||||
TranslationModel FitTranslationModel(const RegionFlowFeatureList& features);
|
||||
|
||||
LinearSimilarityModel FitLinearSimilarityModel(
|
||||
const RegionFlowFeatureList& features);
|
||||
|
||||
AffineModel FitAffineModel(const RegionFlowFeatureList& features);
|
||||
|
||||
Homography FitHomography(const RegionFlowFeatureList& features);
|
||||
|
||||
MixtureHomography FitMixtureHomography(const RegionFlowFeatureList& features);
|
||||
|
||||
// Templated fitting functions.
|
||||
template <class Model>
|
||||
Model FitModel(const RegionFlowFeatureList& features);
|
||||
|
||||
template <>
|
||||
inline TranslationModel FitModel(const RegionFlowFeatureList& features) {
|
||||
return FitTranslationModel(features);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline LinearSimilarityModel FitModel(const RegionFlowFeatureList& features) {
|
||||
return FitLinearSimilarityModel(features);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline AffineModel FitModel(const RegionFlowFeatureList& features) {
|
||||
return FitAffineModel(features);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Homography FitModel(const RegionFlowFeatureList& features) {
|
||||
return FitHomography(features);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline MixtureHomography FitModel(const RegionFlowFeatureList& features) {
|
||||
return FitMixtureHomography(features);
|
||||
}
|
||||
|
||||
// Generic projection function that projects models in an arbitrary direction
|
||||
// (that is from lower to higher or vice versa) via fast model fits, without
|
||||
// any error bound checking.
|
||||
// MixtureRowWeights are only necessary for ToModel == MixtureHomography.
|
||||
template <class ToModel, class FromModel>
|
||||
ToModel ProjectViaFit(const FromModel& model, int frame_width, int frame_height,
|
||||
MixtureRowWeights* row_weights = nullptr,
|
||||
int grid_dim = 10) {
|
||||
// Build a grid of features.
|
||||
const float dx = frame_width * 1.0f / grid_dim;
|
||||
const float dy = frame_height * 1.0f / grid_dim;
|
||||
|
||||
// Create region flow from grid.
|
||||
RegionFlowFeatureList grid_features;
|
||||
grid_features.set_frame_width(frame_width);
|
||||
grid_features.set_frame_height(frame_height);
|
||||
grid_features.set_match_frame(-1);
|
||||
|
||||
for (int k = 0; k <= grid_dim; ++k) {
|
||||
for (int l = 0; l <= grid_dim; ++l) {
|
||||
auto* feat = grid_features.add_feature();
|
||||
feat->set_x(l * dx);
|
||||
feat->set_y(k * dy);
|
||||
}
|
||||
}
|
||||
|
||||
RegionFlowFeatureListViaTransform(model, &grid_features, 1.0f,
|
||||
0.0f, // Replace flow.
|
||||
false, // Don't change feature loc.
|
||||
row_weights);
|
||||
return FitModel<ToModel>(grid_features);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
|
||||
670
mediapipe/util/tracking/motion_estimation.proto
Normal file
|
|
@ -0,0 +1,670 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Settings for MotionEstimation.
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
// Note: In general for Estimation modes, the prefix are used as follows:
|
||||
// L2: minimize squared norm of error
|
||||
// IRLS: iterative reweighted least square, L2 minimization using multiple
|
||||
// iterations, downweighting outliers.
|
||||
// Next tag: 69
|
||||
message MotionEstimationOptions {
|
||||
// Specifies which camera models should be estimated, translation is always
|
||||
// estimated.
|
||||
optional bool estimate_translation_irls = 1 [default = true];
|
||||
|
||||
enum LinearSimilarityEstimation {
|
||||
ESTIMATION_LS_NONE = 0;
|
||||
ESTIMATION_LS_L2 = 1; // L2 estimation
|
||||
ESTIMATION_LS_IRLS = 4; // good performance, robust to outliers.
|
||||
|
||||
// DEPRECATED modes.
|
||||
ESTIMATION_LS_L2_RANSAC = 2; // DEPRECATED, use IRLS instead.
|
||||
ESTIMATION_LS_L1 = 3; // DEPRECATED, use IRLS instead, or static
|
||||
// function MotionEstimation::
|
||||
// EstimateLinearSimilarityL1
|
||||
}
|
||||
|
||||
optional LinearSimilarityEstimation linear_similarity_estimation = 3
|
||||
[default = ESTIMATION_LS_IRLS];
|
||||
|
||||
enum AffineEstimation {
|
||||
ESTIMATION_AFFINE_NONE = 0;
|
||||
ESTIMATION_AFFINE_L2 = 1;
|
||||
ESTIMATION_AFFINE_IRLS = 2;
|
||||
}
|
||||
|
||||
optional AffineEstimation affine_estimation = 30
|
||||
[default = ESTIMATION_AFFINE_NONE];
|
||||
|
||||
enum HomographyEstimation {
|
||||
ESTIMATION_HOMOG_NONE = 0;
|
||||
ESTIMATION_HOMOG_L2 = 1;
|
||||
ESTIMATION_HOMOG_IRLS = 2;
|
||||
}
|
||||
|
||||
optional HomographyEstimation homography_estimation = 5
|
||||
[default = ESTIMATION_HOMOG_IRLS];
|
||||
|
||||
// By default, homography estimation minimizes an objective that is not
|
||||
// strictly the L2 distance between matched points. If the flag is set, each
|
||||
// row of the linear system is scaled with the exact denominator which results
|
||||
// in an objective that minimizes the L2 distance.
|
||||
optional bool homography_exact_denominator_scaling = 53 [default = false];
|
||||
|
||||
// Per default, we use exact solver for over-determined system using
|
||||
// well-conditioned QR decomposition. For better speed, set value to false to
|
||||
// use estimation via normal equations.
|
||||
optional bool use_exact_homography_estimation = 54 [default = true];
|
||||
|
||||
// If set uses double instead of float when computing normal equations.
|
||||
optional bool use_highest_accuracy_for_normal_equations = 55 [default = true];
|
||||
|
||||
// Regularizer for perspective part of the homography. If zero, no
|
||||
// regularization is performed. Should be >= 0.
|
||||
optional float homography_perspective_regularizer = 61 [default = 0];
|
||||
|
||||
// Note: Mixture models have high DOF are much more affected by outliers
|
||||
// than models above. It is recommended that if IRLS estimation is NOT used,
|
||||
// that mixture_regularizer is increased by a factor >=3.
|
||||
enum MixtureHomographyEstimation {
|
||||
ESTIMATION_HOMOG_MIX_NONE = 0;
|
||||
ESTIMATION_HOMOG_MIX_L2 = 1;
|
||||
ESTIMATION_HOMOG_MIX_IRLS = 2; // robust to outliers.
|
||||
}
|
||||
|
||||
optional MixtureHomographyEstimation mix_homography_estimation = 12
|
||||
[default = ESTIMATION_HOMOG_MIX_NONE];
|
||||
|
||||
// If row-wise mixture models are estimated, determines number of them.
|
||||
// Note, changing number of mixtures, interpolation sigma and regularizer
|
||||
// is very likely to impact the stability analysis for mixtures and rolling
|
||||
// shutter scoring. At least MixtureHomographyBounds would need to be adjusted
|
||||
// to the new values.
|
||||
optional int32 num_mixtures = 13 [default = 10];
|
||||
|
||||
// If row-wise mixture models are estimated, determines how much each point is
|
||||
// influenced by its neigbhoring mixtures. Specified as relative sigma
|
||||
// (standard deviation) w.r.t. frame_height.
|
||||
optional float mixture_row_sigma = 14 [default = 0.1];
|
||||
|
||||
// Mixture estimation uses L2 regularizer to assure that adjacent mixture
|
||||
// models are similar.
|
||||
optional float mixture_regularizer = 15 [default = 1e-4];
|
||||
|
||||
// Mixtures are estimated across a spectrum of exponentially increasingly
|
||||
// regularizers. In particular the regularizer at level L is given as
|
||||
// mixture_regularizer * mixture_regularizer_base^L.
|
||||
// A maximum of 10 levels are supported (checked!).
|
||||
// Note: When changing the number of levels you probably want to adapt the
|
||||
// MotionStabilizationOptions::rolling_shutter_increment value as well,
|
||||
// as the number of levels directly controls the highest threshold for
|
||||
// the rolling shutter index analysis.
|
||||
optional float mixture_regularizer_levels = 42 [default = 3];
|
||||
optional float mixture_regularizer_base = 43 [default = 2.2];
|
||||
optional int32 mixture_rs_analysis_level = 44 [default = 2];
|
||||
|
||||
// IRLS rounds to down-weight outliers (default across all models).
|
||||
// Note: IRLS in combination with full mixture models (as opposed to the
|
||||
// default reduced ones) is somewhat expensive.
|
||||
optional int32 irls_rounds = 17 [default = 10];
|
||||
|
||||
// If set to > 0 (always needs be less than 1.0), influence of supplied prior
|
||||
// irls weights is linearlly decreased from the specified prior scale (weight
|
||||
// 1.0) to prior_scale. Effectively, biases the solution to the
|
||||
// supplied prior features.
|
||||
// Note: Without irls_weights_preinitialized set to true, this option is
|
||||
// effectively a no op.
|
||||
// TODO: Retire this option.
|
||||
optional float irls_prior_scale = 50 [default = 0.2];
|
||||
|
||||
// Determine how to normalize irls weights w.r.t. average motion magnitude.
|
||||
// In general a residual of 1 pixel is assigned an IRLS weight of 1.
|
||||
// However as larger motions in general are affected by a larger error, we
|
||||
// normalize irls weights, such that a residual
|
||||
// of distance of irls_motion_magnitude_fraction times
|
||||
// <average translation magnitude> equals an IRLS weight of 1.
|
||||
// Must be larger than zero.
|
||||
optional float irls_motion_magnitude_fraction = 31 [default = 0.08];
|
||||
|
||||
// Scale that is applied for mixture (where error is expected to be bigger).
|
||||
optional float irls_mixture_fraction_scale = 68 [default = 1.5];
|
||||
|
||||
// By default, irls weight of all features are set uniformly to one before
|
||||
// estimating EACH model, refining them in subsequent irls iterations.
|
||||
// If flag below is set, input irls weights are used instead for each motion
|
||||
// model.
|
||||
optional bool irls_weights_preinitialized = 39 [default = false];
|
||||
|
||||
// If weights are pre-initialized optionally min filter weights along track
|
||||
// ids when long tracks are used. This can be used to consistently label
|
||||
// outliers in time before estimation.
|
||||
optional bool filter_initialized_irls_weights = 67 [default = false];
|
||||
|
||||
// If activated, irls weight of outlier features are reset. Outliers are
|
||||
// defined as those features, for which the best model fit after #rounds
|
||||
// iterations of RANSAC did NOT yield an error lower than cutoff.
|
||||
// Only applies to translation and similarity estimation.
|
||||
message IrlsOutlierInitialization {
|
||||
optional bool activated = 1 [default = false];
|
||||
|
||||
optional int32 rounds = 2 [default = 100];
|
||||
optional float cutoff = 3 [default = 0.003];
|
||||
}
|
||||
optional IrlsOutlierInitialization irls_initialization = 56;
|
||||
|
||||
// Normalizes feature's irls weights prior to estimation such that
|
||||
// feature in high density areas are downweighted. Multiplicative in case
|
||||
// irls_weights_preinitialized is set to true.
|
||||
optional bool feature_density_normalization = 62 [default = false];
|
||||
|
||||
// A regular grid of size feature_mask_size x feature_mask_size
|
||||
// is used to normalize features w.r.t. their density.
|
||||
optional int32 feature_mask_size = 63 [default = 10];
|
||||
|
||||
// In addition to above outlier and density initialization, long features
|
||||
// that are present for a specified ratio of the analysis interval can be
|
||||
// upweighted. This greatly improves temporal consistency.
|
||||
message LongFeatureInitialization {
|
||||
optional bool activated = 1 [default = false];
|
||||
|
||||
// Tracks with a length greater of equal to the specified percentile
|
||||
// are upweighted by the specified upweight_multiplier.
|
||||
optional float min_length_percentile = 2 [default = 0.95];
|
||||
|
||||
// Features passing above test have their irls weight increased by the
|
||||
// specified multiplier prior to estimation.
|
||||
optional float upweight_multiplier = 3 [default = 5];
|
||||
}
|
||||
|
||||
optional LongFeatureInitialization long_feature_initialization = 66;
|
||||
|
||||
// Irls initialization can be performed in a temporal depdent manner,
|
||||
// (if estimation_policy() == TEMPORALLY_DEPENDENT), where
|
||||
// the previous frame's motion estimation biases the IrlsInitialization of the
|
||||
// currently processed frame. In particular the location and magnitude of
|
||||
// inliers is used during the RANSAC selection stage, to favor those features
|
||||
// that agree with the prior, represented as confidence mask of inliers
|
||||
// (using same dimension as above feature_mask_size).
|
||||
// After estimation, the prior is updated.
|
||||
message IrlsMaskOptions {
|
||||
// Amount prior is decayed after each iteration.
|
||||
optional float decay = 2 [default = 0.7];
|
||||
|
||||
// Score that each inlier adds to the current prior. Specified w.r.t. total
|
||||
// number of features, i.e. each feature increases a bins score by
|
||||
// inlier_score.
|
||||
optional float inlier_score = 3 [default = 0.4];
|
||||
|
||||
// Each inlier scores at least this value regardless of the inlier mask
|
||||
// (additive).
|
||||
optional float base_score = 4 [default = 0.2];
|
||||
|
||||
// Motions are scored relative to previous motion. Threshold denotes
|
||||
// absolute minimum of denominator.
|
||||
optional float min_translation_norm = 5 [default = 2e-3];
|
||||
|
||||
// Translation is updated in every step by blending it with the previous
|
||||
// estimated translation. (alpha is within 0 to 1, where 0 indicates to use
|
||||
// only measured translation, i.e. no blending).
|
||||
optional float translation_blend_alpha = 6 [default = 0.7];
|
||||
|
||||
// Every time translation is updated, prior (in [0, 1]) is increased by the
|
||||
// specified amount.
|
||||
optional float translation_prior_increase = 7 [default = 0.2];
|
||||
|
||||
// Deprecated fields.
|
||||
extensions 1;
|
||||
}
|
||||
|
||||
optional IrlsMaskOptions irls_mask_options = 57;
|
||||
|
||||
// Describes how long feature tracks are leveraged for joint estimation across
|
||||
// many frames.
|
||||
message JointTrackEstimationOptions {
|
||||
// For each frame-pair motion model, describing the motion between frame
|
||||
// I and I - 1, estimate in addition several additional motion
|
||||
// models along long feature tracks describing the motion between frame
|
||||
// I and I - k * motion_stride (additional models are not output,
|
||||
// but help to filter irls weights).
|
||||
// Specifies total number of estimated motion models per frame-pair. Must be
|
||||
// greater than zero.
|
||||
optional int32 num_motion_models = 1 [default = 3];
|
||||
|
||||
// Spacing in frames for additional motion models.
|
||||
optional int32 motion_stride = 2 [default = 15];
|
||||
|
||||
// If set, performs temporal smoothing across frames of the obtained irls
|
||||
// weights.
|
||||
optional bool temporal_smoothing = 3 [default = false];
|
||||
|
||||
// TODO: Specify which filter is used during temporal smoothing.
|
||||
// TODO: Limit joint estimation to static scenes
|
||||
// (dont violate rigidity of alignment in case of wide
|
||||
// baselines). Adopt stride and num_motion_models
|
||||
// accordingly.
|
||||
}
|
||||
|
||||
optional JointTrackEstimationOptions joint_track_estimation = 59;
|
||||
|
||||
// Options being used to bias IRLS features if estimation mode
|
||||
// TEMPORAL_LONG_FEATURE_BIAS is being used.
|
||||
// Next Tag: 15
|
||||
message LongFeatureBiasOptions {
|
||||
// Estimation is performed multiple times, alternating between model
|
||||
// estimation and smooth temporal feature biasing for the specified number
|
||||
// of rounds.
|
||||
optional int32 total_rounds = 13 [default = 1];
|
||||
|
||||
// Controls how fast the bias for a track gets updated, in case feature is
|
||||
// an inlier. Use higher values for less decay of background motion over
|
||||
// time.
|
||||
optional float inlier_bias = 1 [default = 0.98];
|
||||
|
||||
// Same as above for outliers (or features with low prior), i.e those that
|
||||
// got recently seeded.
|
||||
optional float outlier_bias = 2 [default = 0.7];
|
||||
|
||||
// Number of elements after which we deem estimation to be stable.
|
||||
// Used to control weight of bias if fewer than the specified number have
|
||||
// been observed. Also used as maximum ring buffer size (only most recent
|
||||
// number of observations are kept). Must be > 0.
|
||||
optional int32 num_irls_observations = 3 [default = 10];
|
||||
|
||||
// Change in irls weight magnitude (from outlier to inlier) above which we
|
||||
// reset the current bias.
|
||||
optional float max_irls_change_ratio = 4 [default = 10.0];
|
||||
|
||||
// Irls weight above which we consider it to be an inlier for bias update
|
||||
// purposes (see above inlier and outlier bias). By default, outliers are
|
||||
// allowed to update their bias faster than inliers. Must be > 0.
|
||||
optional float inlier_irls_weight = 5 [default = 0.2];
|
||||
|
||||
// Standard deviation used during feature initialization. Current bias of a
|
||||
// track is used to pre-weight features via gaussian weighting with
|
||||
// specified standard deviation.
|
||||
optional float bias_stdev = 12 [default = 1.0];
|
||||
|
||||
// When seeding new tracks (on the first frame), we bilaterally pool
|
||||
// neighboring feature biases as seed. Details are controlled by options
|
||||
// below. If false, the feature's estimation error is used instead
|
||||
// (faster, but less spatially smooth).
|
||||
// If activated it is advised to use a patch descriptor radius of at least
|
||||
// 20 pixels.
|
||||
optional bool use_spatial_bias = 6 [default = true];
|
||||
|
||||
// Newly observered tracks's biases are seeded by similar looking features
|
||||
// in close spatial proximity. For efficieny a grid is used to determine
|
||||
// proximity.
|
||||
// Grid size in normalized coordinates w.r.t. frame domain.
|
||||
optional float grid_size = 7 [default = 0.04];
|
||||
|
||||
// Sigma's for combining feature biases.
|
||||
optional float spatial_sigma = 8 [default = 0.02];
|
||||
optional float color_sigma = 9 [default = 20.0];
|
||||
|
||||
// Defines what we consider to be a long track. Features spawned around
|
||||
// locations of similar looking long tracks are considered to have
|
||||
// high prior, e.g. their initilization is given more weight.
|
||||
optional int32 long_track_threshold = 10 [default = 30];
|
||||
|
||||
// Determines with fraction of long tracks is considered to be sufficient
|
||||
// for highly confident bias seed.
|
||||
optional float long_track_confidence_fraction = 11 [default = 0.25];
|
||||
|
||||
// If activated, uses the irls weights from the estimation of the lower
|
||||
// degree of freedom model to seed the bias of the higher degree of freedom
|
||||
// model. This improves rigidity of the computed motion.
|
||||
optional bool seed_priors_from_bias = 14 [default = false];
|
||||
}
|
||||
|
||||
optional LongFeatureBiasOptions long_feature_bias_options = 64;
|
||||
|
||||
// Controls how multiple models via EstimateMotionsParallel are estimated.
|
||||
enum EstimationPolicy {
|
||||
INDEPENDENT_PARALLEL = 1; // Models are estimated independently across
|
||||
// frames in parallel.
|
||||
TEMPORAL_IRLS_MASK = 2; // Previous frame's estimation biases
|
||||
// current one, controlled via above
|
||||
// IrlsMaskOptions.
|
||||
TEMPORAL_LONG_FEATURE_BIAS = 4; // Frame's estimation is biased along
|
||||
// long features, controlled via above
|
||||
// LongFeatureBiasOptions.
|
||||
JOINTLY_FROM_TRACKS = 3; // Estimation is performed jointly over
|
||||
// chunks of frames, exercising a consistent
|
||||
// irls weight PER track. Expects as input
|
||||
// RegionFlowFeatureList's with long_tracks.
|
||||
// Controlled via above
|
||||
// JointTrackEstimationOptions.
|
||||
}
|
||||
optional EstimationPolicy estimation_policy = 58
|
||||
[default = INDEPENDENT_PARALLEL];
|
||||
|
||||
optional int32 coverage_grid_size = 51 [default = 10];
|
||||
|
||||
// Degree of freedom of estimated homography mixtures. If desired, specific
|
||||
// parts of the homography can be held constant across the mixture.
|
||||
// For fast draft TRANSLATION_MIXTURE is recommended, for high quality
|
||||
// SKEW_ROTATION_MIXTURE.
|
||||
enum MixtureModelMode {
|
||||
FULL_MIXTURE = 0; // 8 dof * num_mixtures
|
||||
TRANSLATION_MIXTURE = 1; // 6 dof + 2 dof * num_mixtures
|
||||
SKEW_ROTATION_MIXTURE = 2; // 4 dof + 4 dof * num_mixtures
|
||||
}
|
||||
|
||||
optional MixtureModelMode mixture_model_mode = 23
|
||||
[default = SKEW_ROTATION_MIXTURE];
|
||||
|
||||
// If specified, only features that agree with the estimated linear similarity
|
||||
// will be used to estimate the homography.
|
||||
// If set, linear_similarity_estimation can not be ESTIMATION_NONE! (checked)
|
||||
optional bool use_only_lin_sim_inliers_for_homography = 6 [default = true];
|
||||
|
||||
// Max. deviation to be considered an inlier w.r.t. estimated similarity for
|
||||
// above flag. This value is set w.r.t. normalized frame diameter.
|
||||
// TODO: Should take GetIRLSResidualScale into account.
|
||||
optional float lin_sim_inlier_threshold = 20 [default = 0.003];
|
||||
|
||||
// If any parameter of the input flow or estimated translation exceeds these
|
||||
// thresholds we deem the motion INVALID.
|
||||
message TranslationBounds {
|
||||
// Absolute minimum of features present.
|
||||
optional int32 min_features = 1 [default = 3];
|
||||
|
||||
// Max magnitude of the translation expressed w.r.t. frame diameter
|
||||
optional float frac_max_motion_magnitude = 2 [default = 0.15];
|
||||
|
||||
// Motion magnitude is only tested for if standard deviation of estimated
|
||||
// translation exceeds threshold.
|
||||
optional float max_motion_stdev_threshold = 4 [default = 0.01];
|
||||
|
||||
// Max standard deviation of the estimated translation (normalized to frame
|
||||
// diameter).
|
||||
optional float max_motion_stdev = 3 [default = 0.065];
|
||||
|
||||
// Maximum acceleration between frames. Specified relative to minimum
|
||||
// velocity across two adjacent frames (absolute minimum of 0.001 is
|
||||
// enforced, ~1 pix for 480p).
|
||||
// If exceeded for one frame, the whole batch passed to
|
||||
// EstimateMotionsParallel is labeled unstable.
|
||||
optional float max_acceleration = 5 [default = 20.0];
|
||||
}
|
||||
|
||||
optional TranslationBounds stable_translation_bounds = 32;
|
||||
|
||||
// If any test/bound is violated, the motion is deemed UNSTABLE.
|
||||
message SimilarityBounds {
|
||||
// Input frame has to be labeled stable, i.e. enough features and coverage
|
||||
// present.
|
||||
optional bool only_stable_input = 1 [default = true];
|
||||
|
||||
// Minimum number of inlier features (absolute and as fraction of total
|
||||
// number of features).
|
||||
// TODO: Dataset run setting this to 0.15
|
||||
optional float min_inlier_fraction = 2 [default = 0.2];
|
||||
optional float min_inliers = 3 [default = 30];
|
||||
|
||||
// Bounds on valid similarities. We use larger values compared to
|
||||
// homographies. Note: Bounds are necessary, to guarantee invertability
|
||||
// of the resulting similarity.
|
||||
optional float lower_scale = 4 [default = 0.8];
|
||||
optional float upper_scale = 5 [default = 1.25]; // 1 / 0.8.
|
||||
optional float limit_rotation = 6 [default = 0.25]; // 15 degrees.
|
||||
|
||||
// Thresholds for a feature to be considered inlier w.r.t similarity
|
||||
// transform, expressed in terms of pixel residual error. Max of absolute
|
||||
// and fractional thresholds is used.
|
||||
// Ratio of inliers that pass regular and strict thresholds are storred in
|
||||
// CameraMotion.
|
||||
//
|
||||
// TODO: Just use lin_sim_inlier_threshold directly, however that
|
||||
// recomputes the error, and requires regression testing. Using an extra
|
||||
// fractional inlier threshold for now.
|
||||
//
|
||||
// Absolute in pixels.
|
||||
optional float inlier_threshold = 7 [default = 4.0];
|
||||
// Scaled by frame diameter.
|
||||
optional float frac_inlier_threshold = 8 [default = 0];
|
||||
|
||||
// TODO: Revisit after frame selection change.
|
||||
// Absolute in pixels.
|
||||
optional float strict_inlier_threshold = 9 [default = 0.5];
|
||||
}
|
||||
|
||||
optional SimilarityBounds stable_similarity_bounds = 33;
|
||||
|
||||
// If any parameter of the estimated homography exceeds these bounds,
|
||||
// we deem it UNSTABLE_SIM and use estimated similarity instead.
|
||||
message HomographyBounds {
|
||||
optional float lower_scale = 1 [default = 0.8];
|
||||
optional float upper_scale = 2 [default = 1.25]; // 1 / 0.8.
|
||||
optional float limit_rotation = 3 [default = 0.25]; // 15 degrees.
|
||||
optional float limit_perspective = 4 [default = 0.0004];
|
||||
|
||||
// Inlier coverage is only tested for if average homography error exceeds
|
||||
// registration_thresholds. Max of the following two thresholds is used.
|
||||
//
|
||||
// Absolute in pixels.
|
||||
optional float registration_threshold = 5 [default = 0.1];
|
||||
// Scaled by frame diameter.
|
||||
optional float frac_registration_threshold = 8 [default = 0];
|
||||
|
||||
// Minimum fraction of inlier features w.r.t. frame area.
|
||||
optional float min_inlier_coverage = 6 [default = 0.3];
|
||||
|
||||
// Grid coverage inlier threshold. Pixel errors below this
|
||||
// threshold are considered inliers. Defined w.r.t. frame diameter, approx.
|
||||
// 1.5 for 16:9 SD video (480p), i.e. threshold is multiplied by frame
|
||||
// diameter.
|
||||
optional float frac_inlier_threshold = 7 [default = 2.0e-3];
|
||||
}
|
||||
|
||||
optional HomographyBounds stable_homography_bounds = 11;
|
||||
|
||||
// If any parameter of the estimated homography mixture exceeds these bounds,
|
||||
// we deem it UNSTABLE_HOMOG and use the estimated homography instead.
|
||||
message MixtureHomographyBounds {
|
||||
// Minimum fraction of inlier features w.r.t. block area.
|
||||
optional float min_inlier_coverage = 1 [default = 0.4];
|
||||
|
||||
// Each block is tested to be stable, regarding the outliers.
|
||||
// A frame is labeled unstable, if more or equal than the specified adjacent
|
||||
// blocks are labeled outliers.
|
||||
optional int32 max_adjacent_outlier_blocks = 2 [default = 5];
|
||||
|
||||
// Maximum number of adjacent empty blocks (no inliers).
|
||||
optional int32 max_adjacent_empty_blocks = 3 [default = 3];
|
||||
|
||||
// Grid coverage threshold inlier threshold. See identical parameter in
|
||||
// HomographyBounds.
|
||||
optional float frac_inlier_threshold = 7 [default = 2.5e-3];
|
||||
}
|
||||
|
||||
optional MixtureHomographyBounds stable_mixture_homography_bounds = 34;
|
||||
|
||||
// Scale for stricter coverage evaluation. Used for rolling shutter guess
|
||||
// computation, by only using high quality inliers. Larger values reflect
|
||||
// stricter coverage.
|
||||
// Specifically, when computing coverage via GridCoverage call,
|
||||
// frac_inlier_threshold is reduced (divided) by specified scale below.
|
||||
optional float strict_coverage_scale = 41 [default = 1.333];
|
||||
|
||||
// By default frames with zero trackable features (e.g. at the beginning,
|
||||
// empty frame or shot boundary) are set identity model but still labeled as
|
||||
// valid. If set to false, these frames are flagged as invalid, which can be
|
||||
// useful to locate shot boundaries, etc.
|
||||
optional bool label_empty_frames_as_valid = 22 [default = true];
|
||||
|
||||
// Setting for temporal smoothing of irls weights in optional post-processing
|
||||
// step.
|
||||
// In normalized coordinates w.r.t. frame domain.
|
||||
optional float feature_grid_size = 24 [default = 0.05];
|
||||
optional float spatial_sigma = 25 [default = 0.01];
|
||||
|
||||
// Frame diameter across which smoothing is performed.
|
||||
optional int32 temporal_irls_diameter = 26 [default = 20];
|
||||
optional float temporal_sigma = 27 [default = 5]; // in frames.
|
||||
|
||||
// Bilateral weight (for un-normalized color domain [0, .. 255]).
|
||||
optional float feature_sigma = 28 [default = 30.0];
|
||||
|
||||
// If set to false 3 taps are used.
|
||||
optional bool filter_5_taps = 29 [default = false];
|
||||
|
||||
// If set, during temporal smoothing, each frame is weighted by its
|
||||
// confidence, defined as the square coverage (or square mean mixture
|
||||
// coverage). Therefore, low confidence fits do not errornouesly propagate
|
||||
// over time. In addition, if the confidence is below the specified
|
||||
// confidence_threshold (relative the the maximum coverage observed in the
|
||||
// test interval), irls weights are reset to 1, i.e. biased to be
|
||||
// agree with the (unkown) background motion.
|
||||
optional bool frame_confidence_weighting = 48 [default = true];
|
||||
optional float reset_confidence_threshold = 49 [default = 0.4];
|
||||
|
||||
// Filters irls weights before smoothing them according to specified
|
||||
// operation.
|
||||
enum IRLSWeightFilter {
|
||||
IRLS_FILTER_NONE = 0;
|
||||
IRLS_FILTER_TEXTURE = 1;
|
||||
IRLS_FILTER_CORNER_RESPONSE = 2;
|
||||
}
|
||||
|
||||
// Calls TextureFilteredRegionFlowFeatureIRLSWeights on computed irls weights
|
||||
// before smoothing them.
|
||||
optional IRLSWeightFilter irls_weight_filter = 35
|
||||
[default = IRLS_FILTER_NONE];
|
||||
|
||||
// Attempts to detect overlays, i.e. static elements burned-into the video
|
||||
// that potentially corrupt motion estimation.
|
||||
optional bool overlay_detection = 36 [default = false];
|
||||
// Overlay detection is performed over specified number of frames.
|
||||
optional int32 overlay_analysis_chunk_size = 37 [default = 8];
|
||||
|
||||
message OverlayDetectionOptions {
|
||||
// Potential overlay features are aggregated over a mask with cells
|
||||
// mask_size x mask_size as specified below.
|
||||
optional int32 analysis_mask_size = 1 [default = 10];
|
||||
|
||||
// There are two types of candidates of overlay features, strict and loose
|
||||
// ones. Strict candidates are used to determine if a grid bin should be
|
||||
// considered an overlay cell. If the grid is an overlay cell, *all*
|
||||
// candidates, i.e. strict and loose ones will be flagged as overlay
|
||||
// features by setting their corresponding irls weight to zero.
|
||||
|
||||
// A feature is a strict overlay feature if its motion is less than
|
||||
// near_zero_motion and AND less than max_translation_ratio times the
|
||||
// estimated translation magnitude at that frame AND is texturedness is
|
||||
// sufficiently high.
|
||||
optional float strict_near_zero_motion = 2 [default = 0.2];
|
||||
optional float strict_max_translation_ratio = 3 [default = 0.2];
|
||||
// Minimum texturedness of a feature to be considered an overlay.
|
||||
// Motivation: Overlays are mostly text or graphics, i.e. have visually
|
||||
// distinguished features.
|
||||
optional float strict_min_texturedness = 5 [default = 0.1];
|
||||
|
||||
// A feature is a loose overlay feature if its motion is less than
|
||||
// loose_near_zero_motion.
|
||||
optional float loose_near_zero_motion = 4 [default = 1.0];
|
||||
|
||||
// Minimum fraction of strict overlay features within a cell to be
|
||||
// considered an overlay cell.
|
||||
optional float overlay_min_ratio = 6 [default = 0.3];
|
||||
|
||||
// Absolute minimum number of strict overlay features within a cell to be
|
||||
// considered an overlay cel..
|
||||
optional float overlay_min_features = 7 [default = 10];
|
||||
}
|
||||
|
||||
optional OverlayDetectionOptions overlay_detection_options = 38;
|
||||
|
||||
// Shot boundaries are introduced in 3 different scenarios:
|
||||
// a) Frame has zero tracked features w.r.t. previous frame
|
||||
// b) Estimated motion is deemed invalid (CameraMotion::INVALID).
|
||||
// c) Visual consistency is above threshold of two adjacent frames.
|
||||
message ShotBoundaryOptions {
|
||||
// After cases a & b are determined from features/camera motion, they
|
||||
// are verified by ensuring visual consistency is above specified threshold,
|
||||
// if visual consistency has been computed. Only if this is case will the
|
||||
// frame be labeled as shot boundary. Motivation is, that there should
|
||||
// always be some (even small) measurable increase in the frame difference
|
||||
// at a shot boundary.
|
||||
// Verification is only performed if visual_consistency has been evaluated
|
||||
// (value >= 0).
|
||||
optional float motion_consistency_threshold = 1 [default = 0.02];
|
||||
|
||||
// Threshold for case c). Sometimes, motion estimation will miss shot
|
||||
// boundaries. We define shot boundaries for which the visual consistency is
|
||||
// higher than the specified threshold for at least two adjacent frames.
|
||||
optional float appearance_consistency_threshold = 2 [default = 0.075];
|
||||
}
|
||||
|
||||
optional ShotBoundaryOptions shot_boundary_options = 60;
|
||||
|
||||
// By default, irls weights of each feature are overwritten with refined irls
|
||||
// weights of the last iteration for the highest degree of freedom model that
|
||||
// was estimated stable. If set to false, original irls weights are retained.
|
||||
// Note: If overlay detection is activated, features to be deemed overlays
|
||||
// have their irls weight set to zero, regardless of this setting.
|
||||
// Similarily, an IRLSWeightFilter is applied if requested, regardless
|
||||
// of this setting.
|
||||
optional bool output_refined_irls_weights = 40 [default = true];
|
||||
|
||||
// Weight initialization for homography estimation. This is to bias homography
|
||||
// estimation either to foreground or background.
|
||||
enum HomographyIrlsWeightInitialization {
|
||||
IRLS_WEIGHT_CONSTANT_ONE = 1; // Constant, treat all features equally.
|
||||
IRLS_WEIGHT_CENTER_GAUSSIAN = 2; // Weight features in the center higher.
|
||||
// Tends to lock onto foreground.
|
||||
IRLS_WEIGHT_PERIMETER_GAUSSIAN = 3; // Weight features around the
|
||||
// perimeter higher, tends to lock onto
|
||||
// background and improves rigidity of
|
||||
// the perspective degree of freedom.
|
||||
}
|
||||
|
||||
// IRLS weights for homography estimation are initialized based on the
|
||||
// specified options. If, options irls_weights_preinitialized is set,
|
||||
// weights are multiplied instead of reset.
|
||||
optional HomographyIrlsWeightInitialization
|
||||
homography_irls_weight_initialization = 45
|
||||
[default = IRLS_WEIGHT_PERIMETER_GAUSSIAN];
|
||||
|
||||
// If set to false use L1 norm irls weights instead of L0 norm irls weights.
|
||||
optional bool irls_use_l0_norm = 46 [default = true];
|
||||
|
||||
// IRLS weights are determined in a limited domain (in particular helpful
|
||||
// for stabilization analysis on HD videos).
|
||||
// TODO: Make this the default.
|
||||
optional bool domain_limited_irls_scaling = 65 [default = false];
|
||||
|
||||
// For comparison and debugging purposes. Simply estimates requested models
|
||||
// without checking their stability via the stable_*_bounds parameters.
|
||||
// However, invertibility is still checked to avoid invalid data being passed
|
||||
// to later stages of the stabilizer.
|
||||
optional bool deactivate_stable_motion_estimation = 47 [default = false];
|
||||
|
||||
// Projects higher order motions if estimated correctly down to lower order
|
||||
// motions, therefore replacing the previously estimated motions.
|
||||
optional bool project_valid_motions_down = 52 [default = false];
|
||||
|
||||
// DEPRECATED functionality. Use static functions as indicated instead.
|
||||
//
|
||||
// Non-linear similarity, use MotionEstimation::EstimateSimilarityModelL2.
|
||||
optional bool estimate_similarity = 2 [deprecated = true];
|
||||
|
||||
// Deprecated fields.
|
||||
extensions 7, 8, 16;
|
||||
}
|
||||
924
mediapipe/util/tracking/motion_models.cc
Normal file
|
|
@ -0,0 +1,924 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/Dense"
|
||||
#include "absl/strings/str_format.h"
|
||||
|
||||
// Set to true to use catmull rom mixture weights instead of Gaussian weights
|
||||
// for homography mixture estimation.
|
||||
bool flags_catmull_rom_mixture_weights = false;
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
std::string ModelAdapter<TranslationModel>::ToString(
|
||||
const TranslationModel& model) {
|
||||
return absl::StrFormat("%7f", model.dx()) + " " +
|
||||
absl::StrFormat("%7f", model.dy());
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<TranslationModel>::ToAffine(
|
||||
const TranslationModel& model) {
|
||||
return AffineAdapter::FromArgs(model.dx(), model.dy(), 1, 0, 0, 1);
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::FromAffine(
|
||||
const AffineModel& model) {
|
||||
DCHECK_EQ(model.a(), 1);
|
||||
DCHECK_EQ(model.b(), 0);
|
||||
DCHECK_EQ(model.c(), 0);
|
||||
DCHECK_EQ(model.d(), 1);
|
||||
|
||||
return TranslationAdapter::FromArgs(model.dx(), model.dy());
|
||||
}
|
||||
|
||||
Homography ModelAdapter<TranslationModel>::ToHomography(
|
||||
const TranslationModel& model) {
|
||||
return HomographyAdapter::FromAffine(ToAffine(model));
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::FromHomography(
|
||||
const Homography& model) {
|
||||
return TranslationAdapter::FromAffine(HomographyAdapter::ToAffine(model));
|
||||
}
|
||||
|
||||
void ModelAdapter<TranslationModel>::GetJacobianAtPoint(const Vector2_f& pt,
|
||||
float* jacobian) {
|
||||
DCHECK(jacobian);
|
||||
jacobian[0] = 1;
|
||||
jacobian[1] = 0;
|
||||
jacobian[2] = 0;
|
||||
jacobian[3] = 1;
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::NormalizationTransform(
|
||||
float frame_width, float frame_height) {
|
||||
// Independent of frame size.
|
||||
return TranslationModel();
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::Maximum(
|
||||
const TranslationModel& lhs, const TranslationModel& rhs) {
|
||||
return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()));
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
|
||||
const LinearSimilarityModel& model, float frame_width, float frame_height) {
|
||||
return LinearSimilarityAdapter::ProjectToTranslation(model, frame_width,
|
||||
frame_height);
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
|
||||
const AffineModel& model, float frame_width, float frame_height) {
|
||||
return ProjectFrom(AffineAdapter::ProjectToLinearSimilarity(
|
||||
model, frame_width, frame_height),
|
||||
frame_width, frame_height);
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
|
||||
const Homography& model, float frame_width, float frame_height) {
|
||||
return ProjectFrom(
|
||||
HomographyAdapter::ProjectToAffine(model, frame_width, frame_height),
|
||||
frame_width, frame_height);
|
||||
}
|
||||
|
||||
// Similarity model.
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::FromArgs(float dx, float dy,
|
||||
float scale,
|
||||
float rotation) {
|
||||
SimilarityModel model;
|
||||
model.set_dx(dx);
|
||||
model.set_dy(dy);
|
||||
model.set_scale(scale);
|
||||
model.set_rotation(rotation);
|
||||
return model;
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::FromFloatPointer(
|
||||
const float* args, bool identity_parametrization) {
|
||||
DCHECK(args);
|
||||
SimilarityModel model;
|
||||
model.set_dx(args[0]);
|
||||
model.set_dy(args[1]);
|
||||
model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
|
||||
model.set_rotation(args[3]);
|
||||
return model;
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::FromDoublePointer(
|
||||
const double* args, bool identity_parametrization) {
|
||||
DCHECK(args);
|
||||
SimilarityModel model;
|
||||
model.set_dx(args[0]);
|
||||
model.set_dy(args[1]);
|
||||
model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
|
||||
model.set_rotation(args[3]);
|
||||
return model;
|
||||
}
|
||||
|
||||
Vector2_f ModelAdapter<SimilarityModel>::TransformPoint(
|
||||
const SimilarityModel& model, const Vector2_f& pt) {
|
||||
// Setup rotation part.
|
||||
const float c_r = std::cos(model.rotation());
|
||||
const float c_s = std::sin(model.rotation());
|
||||
const Vector2_f pt_rot(c_r * pt.x() - c_s * pt.y(),
|
||||
c_s * pt.x() + c_r * pt.y());
|
||||
|
||||
return pt_rot * model.scale() + Vector2_f(model.dx(), model.dy());
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::Invert(
|
||||
const SimilarityModel& model) {
|
||||
bool success = true;
|
||||
const SimilarityModel result = InvertChecked(model, &success);
|
||||
if (!success) {
|
||||
LOG(ERROR) << "Model not invertible. Returning identity.";
|
||||
return SimilarityModel();
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::InvertChecked(
|
||||
const SimilarityModel& model, bool* success) {
|
||||
SimilarityModel inv_model;
|
||||
inv_model.set_rotation(-model.rotation());
|
||||
|
||||
if (fabs(model.scale()) > kDetInvertibleEps) {
|
||||
inv_model.set_scale(1.0 / model.scale());
|
||||
*success = true;
|
||||
} else {
|
||||
*success = false;
|
||||
VLOG(1) << "Model is not invertible.";
|
||||
return SimilarityModel();
|
||||
}
|
||||
|
||||
// Rotate and scale [dx, dy] by inv_model.
|
||||
const float c_r = std::cos(inv_model.rotation());
|
||||
const float c_s = std::sin(inv_model.rotation());
|
||||
const Vector2_f pt_rot(c_r * model.dx() - c_s * model.dy(),
|
||||
c_s * model.dx() + c_r * model.dy());
|
||||
|
||||
const Vector2_f inv_trans = -pt_rot * inv_model.scale();
|
||||
|
||||
inv_model.set_dx(inv_trans.x());
|
||||
inv_model.set_dy(inv_trans.y());
|
||||
|
||||
return inv_model;
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::Compose(
|
||||
const SimilarityModel& lhs, const SimilarityModel& rhs) {
|
||||
SimilarityModel result;
|
||||
result.set_scale(lhs.scale() * rhs.scale());
|
||||
result.set_rotation(lhs.rotation() + rhs.rotation());
|
||||
|
||||
// Apply lhs scale and rot to rhs translation.
|
||||
const float c_r = std::cos(lhs.rotation());
|
||||
const float c_s = std::sin(lhs.rotation());
|
||||
const Vector2_f trans_rot(c_r * rhs.dx() - c_s * rhs.dy(),
|
||||
c_s * rhs.dx() + c_r * rhs.dy());
|
||||
|
||||
const Vector2_f trans_concat =
|
||||
trans_rot * lhs.scale() + Vector2_f(lhs.dx(), lhs.dy());
|
||||
|
||||
result.set_dx(trans_concat.x());
|
||||
result.set_dy(trans_concat.y());
|
||||
return result;
|
||||
}
|
||||
|
||||
float ModelAdapter<SimilarityModel>::GetParameter(const SimilarityModel& model,
|
||||
int id) {
|
||||
switch (id) {
|
||||
case 0:
|
||||
return model.dx();
|
||||
case 1:
|
||||
return model.dy();
|
||||
case 2:
|
||||
return model.scale();
|
||||
case 3:
|
||||
return model.rotation();
|
||||
default:
|
||||
LOG(FATAL) << "Parameter id is out of bounds";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string ModelAdapter<SimilarityModel>::ToString(
|
||||
const SimilarityModel& model) {
|
||||
return absl::StrFormat("%7f", model.dx()) + " " +
|
||||
absl::StrFormat("%7f", model.dy()) + " " +
|
||||
absl::StrFormat("%7f", model.scale()) + " " +
|
||||
absl::StrFormat("%7f", model.rotation());
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<SimilarityModel>::NormalizationTransform(
|
||||
float frame_width, float frame_height) {
|
||||
const float scale = std::hypot(frame_width, frame_height);
|
||||
DCHECK_NE(scale, 0);
|
||||
return SimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<SimilarityModel>::ProjectToTranslation(
|
||||
const SimilarityModel& model, float frame_width, float frame_height) {
|
||||
return LinearSimilarityAdapter::ProjectToTranslation(
|
||||
LinearSimilarityAdapter::FromSimilarity(model), frame_width,
|
||||
frame_height);
|
||||
}
|
||||
|
||||
std::string ModelAdapter<LinearSimilarityModel>::ToString(
|
||||
const LinearSimilarityModel& model) {
|
||||
return absl::StrFormat("%7f", model.dx()) + " " +
|
||||
absl::StrFormat("%7f", model.dy()) + " " +
|
||||
absl::StrFormat("%7f", model.a()) + " " +
|
||||
absl::StrFormat("%7f", model.b());
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<LinearSimilarityModel>::ToAffine(
|
||||
const LinearSimilarityModel& model) {
|
||||
return AffineAdapter::FromArgs(model.dx(), model.dy(), model.a(), -model.b(),
|
||||
model.b(), model.a());
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromAffine(
|
||||
const AffineModel& model) {
|
||||
DCHECK_EQ(model.a(), model.d());
|
||||
DCHECK_EQ(model.b(), -model.c());
|
||||
|
||||
return LinearSimilarityAdapter::FromArgs(model.dx(), model.dy(), model.a(),
|
||||
-model.b());
|
||||
}
|
||||
|
||||
Homography ModelAdapter<LinearSimilarityModel>::ToHomography(
|
||||
const LinearSimilarityModel& model) {
|
||||
return HomographyAdapter::FromAffine(ToAffine(model));
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromHomography(
|
||||
const Homography& model) {
|
||||
return LinearSimilarityAdapter::FromAffine(
|
||||
HomographyAdapter::ToAffine(model));
|
||||
}
|
||||
|
||||
SimilarityModel ModelAdapter<LinearSimilarityModel>::ToSimilarity(
|
||||
const LinearSimilarityModel& model) {
|
||||
const float scale = std::hypot(model.a(), model.b());
|
||||
return SimilarityAdapter::FromArgs(model.dx(), model.dy(), scale,
|
||||
std::atan2(model.b(), model.a()));
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromSimilarity(
|
||||
const SimilarityModel& model) {
|
||||
return LinearSimilarityAdapter::FromArgs(
|
||||
model.dx(), model.dy(), model.scale() * std::cos(model.rotation()),
|
||||
model.scale() * std::sin(model.rotation()));
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ScaleParameters(
|
||||
const LinearSimilarityModel& model_in, float scale) {
|
||||
LinearSimilarityModel model = model_in;
|
||||
model.set_a(model.a() * scale);
|
||||
model.set_b(model.b() * scale);
|
||||
model.set_dx(model.dx() * scale);
|
||||
model.set_dy(model.dy() * scale);
|
||||
return model;
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::AddIdentity(
|
||||
const LinearSimilarityModel& model_in) {
|
||||
LinearSimilarityModel model = model_in;
|
||||
model.set_a(model.a() + 1);
|
||||
return model;
|
||||
}
|
||||
|
||||
void ModelAdapter<LinearSimilarityModel>::GetJacobianAtPoint(
|
||||
const Vector2_f& pt, float* jacobian) {
|
||||
DCHECK(jacobian);
|
||||
// First row.
|
||||
jacobian[0] = 1;
|
||||
jacobian[1] = 0;
|
||||
jacobian[2] = pt.x();
|
||||
jacobian[3] = -pt.y();
|
||||
// Second row.
|
||||
jacobian[4] = 0;
|
||||
jacobian[5] = 1;
|
||||
jacobian[6] = pt.y();
|
||||
jacobian[7] = pt.x();
|
||||
}
|
||||
|
||||
LinearSimilarityModel
|
||||
ModelAdapter<LinearSimilarityModel>::NormalizationTransform(
|
||||
float frame_width, float frame_height) {
|
||||
const float scale = std::hypot(frame_width, frame_height);
|
||||
DCHECK_NE(scale, 0);
|
||||
return LinearSimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::Maximum(
|
||||
const LinearSimilarityModel& lhs, const LinearSimilarityModel& rhs) {
|
||||
return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
|
||||
std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()));
|
||||
}
|
||||
|
||||
TranslationModel ModelAdapter<LinearSimilarityModel>::ProjectToTranslation(
|
||||
const LinearSimilarityModel& model, float frame_width, float frame_height) {
|
||||
LinearSimilarityModel center_trans = LinearSimilarityAdapter::FromArgs(
|
||||
frame_width * 0.5f, frame_height * 0.5f, 1, 0);
|
||||
LinearSimilarityModel inv_center_trans = LinearSimilarityAdapter::FromArgs(
|
||||
-frame_width * 0.5f, -frame_height * 0.5f, 1, 0);
|
||||
|
||||
// Express model w.r.t. center.
|
||||
LinearSimilarityModel center_model =
|
||||
ModelCompose3(inv_center_trans, model, center_trans);
|
||||
|
||||
// No need to shift back to top-left after decomposition, as translations
|
||||
// are independent from coordinate origin.
|
||||
return TranslationAdapter::FromArgs(center_model.dx(), center_model.dy());
|
||||
}
|
||||
|
||||
std::string ModelAdapter<AffineModel>::ToString(const AffineModel& model) {
|
||||
return absl::StrFormat("%7f", model.dx()) + " " +
|
||||
absl::StrFormat("%7f", model.dy()) + " " +
|
||||
absl::StrFormat("%7f", model.a()) + " " +
|
||||
absl::StrFormat("%7f", model.b()) + " " +
|
||||
absl::StrFormat("%7f", model.c()) + " " +
|
||||
absl::StrFormat("%7f", model.d());
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::NormalizationTransform(
|
||||
float frame_width, float frame_height) {
|
||||
const float scale = std::hypot(frame_width, frame_height);
|
||||
DCHECK_NE(scale, 0);
|
||||
return AffineAdapter::FromArgs(0, 0, 1.0f / scale, 0, 0, 1.0f / scale);
|
||||
}
|
||||
|
||||
Homography ModelAdapter<AffineModel>::ToHomography(const AffineModel& model) {
|
||||
float params[8] = {model.a(), model.b(), model.dx(), model.c(),
|
||||
model.d(), model.dy(), 0, 0};
|
||||
return HomographyAdapter::FromFloatPointer(params, false);
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::FromHomography(const Homography& model) {
|
||||
DCHECK_EQ(model.h_20(), 0);
|
||||
DCHECK_EQ(model.h_21(), 0);
|
||||
|
||||
float params[6] = {model.h_02(), model.h_12(), // dx, dy
|
||||
model.h_00(), model.h_01(), // a, b
|
||||
model.h_10(), model.h_11()}; // c, d
|
||||
|
||||
return FromFloatPointer(params, false);
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::ScaleParameters(
|
||||
const AffineModel& model_in, float scale) {
|
||||
AffineModel model = model_in;
|
||||
model.set_a(model.a() * scale);
|
||||
model.set_b(model.b() * scale);
|
||||
model.set_c(model.c() * scale);
|
||||
model.set_d(model.d() * scale);
|
||||
model.set_dx(model.dx() * scale);
|
||||
model.set_dy(model.dy() * scale);
|
||||
return model;
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::AddIdentity(
|
||||
const AffineModel& model_in) {
|
||||
AffineModel model = model_in;
|
||||
model.set_a(model.a() + 1);
|
||||
model.set_d(model.d() + 1);
|
||||
return model;
|
||||
}
|
||||
|
||||
void ModelAdapter<AffineModel>::GetJacobianAtPoint(const Vector2_f& pt,
|
||||
float* jacobian) {
|
||||
DCHECK(jacobian);
|
||||
// First row.
|
||||
jacobian[0] = 1;
|
||||
jacobian[1] = 0;
|
||||
jacobian[2] = pt.x();
|
||||
jacobian[3] = pt.y();
|
||||
jacobian[4] = 0;
|
||||
jacobian[5] = 0;
|
||||
|
||||
// Second row.
|
||||
jacobian[6] = 0;
|
||||
jacobian[7] = 1;
|
||||
jacobian[8] = 0;
|
||||
jacobian[9] = 0;
|
||||
jacobian[10] = pt.x();
|
||||
jacobian[11] = pt.y();
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::Maximum(const AffineModel& lhs,
|
||||
const AffineModel& rhs) {
|
||||
return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
|
||||
std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()),
|
||||
std::max(lhs.c(), rhs.c()), std::max(lhs.d(), rhs.d()));
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
|
||||
const AffineModel& model, float frame_width, float frame_height) {
|
||||
return AffineAdapter::ProjectToLinearSimilarity(model, frame_width,
|
||||
frame_height);
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
|
||||
const Homography& model, float frame_width, float frame_height) {
|
||||
return ProjectFrom(
|
||||
AffineAdapter::ProjectFrom(model, frame_width, frame_height), frame_width,
|
||||
frame_height);
|
||||
}
|
||||
|
||||
LinearSimilarityModel ModelAdapter<AffineModel>::ProjectToLinearSimilarity(
|
||||
const AffineModel& model, float frame_width, float frame_height) {
|
||||
AffineModel center_trans = AffineAdapter::FromArgs(
|
||||
frame_width * 0.5f, frame_height * 0.5f, 1, 0, 0, 1);
|
||||
AffineModel inv_center_trans = AffineAdapter::FromArgs(
|
||||
-frame_width * 0.5f, -frame_height * 0.5f, 1, 0, 0, 1);
|
||||
|
||||
// Express model w.r.t. center.
|
||||
AffineModel center_model =
|
||||
ModelCompose3(inv_center_trans, model, center_trans);
|
||||
|
||||
// Determine average scale.
|
||||
const float scale = std::sqrt(AffineAdapter::Determinant(center_model));
|
||||
|
||||
// Goal is approximate matrix:
|
||||
// (a b (a' -b'
|
||||
// c d) with b' a')
|
||||
//
|
||||
// := :=
|
||||
// v_1 v_2
|
||||
// After normalization by the scale, a' = cos(u) and b' = sin(u)
|
||||
// therefore, the columns on the right hand side have norm 1 and are
|
||||
// orthogonal.
|
||||
//
|
||||
// ==> Orthonormalize v_1 and v_2
|
||||
|
||||
Vector2_f x_map(center_model.a(), center_model.c()); // == v_1
|
||||
Vector2_f y_map(center_model.b(), center_model.d()); // == v_2
|
||||
x_map.Normalize();
|
||||
y_map.Normalize();
|
||||
|
||||
// Two approaches here
|
||||
// A) Perform Grahm Schmidt, i.e. QR decomp / polar decomposition, i.e.:
|
||||
// y_map' = y_map - <y_map, x_map> * x_map;
|
||||
// ==> no error in x_map, error increasing with larger y
|
||||
// B) Compute the middle vector between x_map and y_map and create
|
||||
// orthogonal system from that
|
||||
// (rotate by -45': [ 1 1
|
||||
// -1 1] * 1/sqrt(2)
|
||||
// Error equally distributed between x and y.
|
||||
//
|
||||
// Comparing approach A and B:
|
||||
//
|
||||
// video 1 (gleicher4) video 2 (pool dance)
|
||||
// Method B
|
||||
// Max diff : dx: 1.6359973 4.600112
|
||||
// dy: 2.1076841 11.51579
|
||||
// a: 1.004498 1.01036
|
||||
// b: 0.0047194548 0.027450036
|
||||
// Method A
|
||||
// Max diff : dx: 4.3549204 14.145205
|
||||
// dy: 2.4496114 7.7804289
|
||||
// a: 1.0136091 1.043335
|
||||
// b: 0.0024313219 0.0065769218
|
||||
|
||||
// Using method B:
|
||||
Vector2_f middle = (x_map + y_map).Normalize();
|
||||
|
||||
Vector2_f a_b = Vector2_f(middle.x() + middle.y(), // see above matrix.
|
||||
middle.y() - middle.x())
|
||||
.Normalize();
|
||||
AffineModel lin_approx = AffineAdapter::FromArgs(
|
||||
center_model.dx(), center_model.dy(), scale * a_b.x(), -scale * a_b.y(),
|
||||
scale * a_b.y(), scale * a_b.x());
|
||||
|
||||
// Map back to top-left origin.
|
||||
return LinearSimilarityAdapter::FromAffine(
|
||||
ModelCompose3(center_trans, lin_approx, inv_center_trans));
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<AffineModel>::ProjectFrom(const Homography& model,
|
||||
float frame_width,
|
||||
float frame_height) {
|
||||
return HomographyAdapter::ProjectToAffine(model, frame_width, frame_height);
|
||||
}
|
||||
|
||||
Homography ModelAdapter<Homography>::InvertChecked(const Homography& model,
|
||||
bool* success) {
|
||||
// Could do adjoint method and do it by hand. Use Eigen for now, not that
|
||||
// crucial at this point.
|
||||
|
||||
Eigen::Matrix3d model_mat;
|
||||
model_mat(0, 0) = model.h_00();
|
||||
model_mat(0, 1) = model.h_01();
|
||||
model_mat(0, 2) = model.h_02();
|
||||
model_mat(1, 0) = model.h_10();
|
||||
model_mat(1, 1) = model.h_11();
|
||||
model_mat(1, 2) = model.h_12();
|
||||
model_mat(2, 0) = model.h_20();
|
||||
model_mat(2, 1) = model.h_21();
|
||||
model_mat(2, 2) = 1.0f;
|
||||
|
||||
if (model_mat.determinant() < kDetInvertibleEps) {
|
||||
VLOG(1) << "Homography not invertible, det is zero.";
|
||||
*success = false;
|
||||
return Homography();
|
||||
}
|
||||
|
||||
Eigen::Matrix3d inv_model_mat = model_mat.inverse();
|
||||
|
||||
if (inv_model_mat(2, 2) == 0) {
|
||||
LOG(ERROR) << "Degenerate homography. See proto.";
|
||||
*success = false;
|
||||
return Homography();
|
||||
}
|
||||
|
||||
*success = true;
|
||||
Homography inv_model;
|
||||
const float scale = 1.0f / inv_model_mat(2, 2);
|
||||
inv_model.set_h_00(inv_model_mat(0, 0) * scale);
|
||||
inv_model.set_h_01(inv_model_mat(0, 1) * scale);
|
||||
inv_model.set_h_02(inv_model_mat(0, 2) * scale);
|
||||
inv_model.set_h_10(inv_model_mat(1, 0) * scale);
|
||||
inv_model.set_h_11(inv_model_mat(1, 1) * scale);
|
||||
inv_model.set_h_12(inv_model_mat(1, 2) * scale);
|
||||
inv_model.set_h_20(inv_model_mat(2, 0) * scale);
|
||||
inv_model.set_h_21(inv_model_mat(2, 1) * scale);
|
||||
|
||||
return inv_model;
|
||||
}
|
||||
|
||||
std::string ModelAdapter<Homography>::ToString(const Homography& model) {
|
||||
return absl::StrFormat("%7f", model.h_00()) + " " +
|
||||
absl::StrFormat("%7f", model.h_01()) + " " +
|
||||
absl::StrFormat("%7f", model.h_02()) + " " +
|
||||
absl::StrFormat("%7f", model.h_10()) + " " +
|
||||
absl::StrFormat("%7f", model.h_11()) + " " +
|
||||
absl::StrFormat("%7f", model.h_12()) + " " +
|
||||
absl::StrFormat("%7f", model.h_20()) + " " +
|
||||
absl::StrFormat("%7f", model.h_21());
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<Homography>::ToAffine(const Homography& model) {
|
||||
DCHECK_EQ(model.h_20(), 0);
|
||||
DCHECK_EQ(model.h_21(), 0);
|
||||
AffineModel affine_model;
|
||||
affine_model.set_a(model.h_00());
|
||||
affine_model.set_b(model.h_01());
|
||||
affine_model.set_dx(model.h_02());
|
||||
affine_model.set_c(model.h_10());
|
||||
affine_model.set_d(model.h_11());
|
||||
affine_model.set_dy(model.h_12());
|
||||
return affine_model;
|
||||
}
|
||||
|
||||
Homography ModelAdapter<Homography>::FromAffine(const AffineModel& model) {
|
||||
return Embed(model);
|
||||
}
|
||||
|
||||
bool ModelAdapter<Homography>::IsAffine(const Homography& model) {
|
||||
return model.h_20() == 0 && model.h_21() == 0;
|
||||
}
|
||||
|
||||
void ModelAdapter<Homography>::GetJacobianAtPoint(const Vector2_f& pt,
|
||||
float* jacobian) {
|
||||
DCHECK(jacobian);
|
||||
// First row.
|
||||
jacobian[0] = pt.x();
|
||||
jacobian[1] = pt.y();
|
||||
jacobian[2] = 1;
|
||||
jacobian[3] = 0;
|
||||
jacobian[4] = 0;
|
||||
jacobian[5] = 0;
|
||||
jacobian[6] = -pt.x() * pt.x();
|
||||
jacobian[7] = -pt.x() * pt.y();
|
||||
|
||||
// Second row.
|
||||
jacobian[8] = 0;
|
||||
jacobian[9] = 0;
|
||||
jacobian[10] = 0;
|
||||
jacobian[11] = pt.x();
|
||||
jacobian[12] = pt.y();
|
||||
jacobian[13] = 1;
|
||||
jacobian[14] = -pt.x() * pt.y();
|
||||
jacobian[15] = -pt.y() * pt.y();
|
||||
}
|
||||
|
||||
Homography ModelAdapter<Homography>::NormalizationTransform(
|
||||
float frame_width, float frame_height) {
|
||||
const float scale = std::hypot(frame_width, frame_height);
|
||||
DCHECK_NE(scale, 0);
|
||||
return HomographyAdapter::FromArgs(1.0f / scale, 0, 0, 0, 1.0f / scale, 0, 0,
|
||||
0);
|
||||
}
|
||||
|
||||
AffineModel ModelAdapter<Homography>::ProjectToAffine(const Homography& model,
|
||||
float frame_width,
|
||||
float frame_height) {
|
||||
Homography center_trans;
|
||||
center_trans.set_h_02(frame_width * 0.5f);
|
||||
center_trans.set_h_12(frame_height * 0.5f);
|
||||
|
||||
Homography inv_center_trans;
|
||||
inv_center_trans.set_h_02(-frame_width * 0.5f);
|
||||
inv_center_trans.set_h_12(-frame_height * 0.5f);
|
||||
|
||||
// Express model w.r.t. center.
|
||||
Homography center_model =
|
||||
ModelCompose3(inv_center_trans, model, center_trans);
|
||||
|
||||
// Zero out perspective.
|
||||
center_model.set_h_20(0);
|
||||
center_model.set_h_21(0);
|
||||
|
||||
// Map back to top left and embed.
|
||||
return ToAffine(ModelCompose3(center_trans, center_model, inv_center_trans));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Returns true if non-empty intersection is present. In this case, start and
|
||||
// end are clipped to rect, specifically after clipping the line
|
||||
// [start, end] is inside the rectangle or incident to the boundary
|
||||
// of the rectangle. If strict is set to true, [start, end] is strictly inside
|
||||
// the rectangle, i.e. not incident to the boundary (minus start and end
|
||||
// point which still can be incident).
|
||||
// Implemented using Liang-Barsky algorithm.
|
||||
// http://en.wikipedia.org/wiki/Liang%E2%80%93Barsky
|
||||
inline bool ClipLine(const Vector2_f& rect, bool strict, Vector2_f* start,
|
||||
Vector2_f* end) {
|
||||
Vector2_f diff = *end - *start;
|
||||
float p[4] = {-diff.x(), diff.x(), -diff.y(), diff.y()};
|
||||
|
||||
// Bounds are (x_min, y_min) = (0, 0)
|
||||
// (x_max, y_max) = rect
|
||||
float q[4] = {start->x() - 0, rect.x() - start->x(), start->y() - 0,
|
||||
rect.y() - start->y()};
|
||||
|
||||
// Compute parametric intersection points.
|
||||
float near = -1e10;
|
||||
float far = 1e10;
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
if (fabs(p[k]) < 1e-6f) {
|
||||
// Line is parallel to one axis of rectangle.
|
||||
if ((strict && q[k] <= 0.0f) || q[k] < 0.0f) {
|
||||
// Line is outside rectangle.
|
||||
return false;
|
||||
} else {
|
||||
// Possible intersection along other dimensions.
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Line is not parallel -> compute intersection.
|
||||
float intersect = q[k] / p[k];
|
||||
// Sign of p determines if near or far parameter.
|
||||
if (p[k] < 0.0f) {
|
||||
near = std::max(near, intersect);
|
||||
} else {
|
||||
far = std::min(far, intersect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (near > far) {
|
||||
// Line is outside of rectangle.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Clip near and far to valid line segment interval [0, 1].
|
||||
far = std::min(far, 1.0f);
|
||||
near = std::max(near, 0.0f);
|
||||
|
||||
if (near <= far) {
|
||||
// Non-empty intersection. Single points are considered valid intersection.
|
||||
*end = *start + diff * far;
|
||||
*start = *start + diff * near;
|
||||
return true;
|
||||
} else {
|
||||
// Empty intersection.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
template <class Model>
|
||||
float ModelMethods<Model>::NormalizedIntersectionArea(const Model& model_1,
|
||||
const Model& model_2,
|
||||
const Vector2_f& rect) {
|
||||
const float rect_area = rect.x() * rect.y();
|
||||
if (rect_area <= 0) {
|
||||
LOG(WARNING) << "Empty rectangle passed -> empty intersection.";
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
std::vector<std::pair<Vector2_f, Vector2_f>> lines(4);
|
||||
lines[0] = std::make_pair(Vector2_f(0, 0), Vector2_f(0, rect.y()));
|
||||
lines[1] =
|
||||
std::make_pair(Vector2_f(0, rect.y()), Vector2_f(rect.x(), rect.y()));
|
||||
lines[2] =
|
||||
std::make_pair(Vector2_f(rect.x(), rect.y()), Vector2_f(rect.x(), 0));
|
||||
lines[3] = std::make_pair(Vector2_f(rect.x(), 0), Vector2_f(0, 0));
|
||||
|
||||
float model_1_area = 0.0f;
|
||||
float model_2_area = 0.0f;
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
Vector2_f start_1 = Adapter::TransformPoint(model_1, lines[k].first);
|
||||
Vector2_f end_1 = Adapter::TransformPoint(model_1, lines[k].second);
|
||||
// Use trapezoidal rule for polygon area.
|
||||
model_1_area += 0.5 * (end_1.y() + start_1.y()) * (end_1.x() - start_1.x());
|
||||
Vector2_f start_2 = Adapter::TransformPoint(model_2, lines[k].first);
|
||||
Vector2_f end_2 = Adapter::TransformPoint(model_2, lines[k].second);
|
||||
model_2_area += 0.5 * (end_2.y() + start_2.y()) * (end_2.x() - start_2.x());
|
||||
}
|
||||
|
||||
const float average_area = 0.5f * (model_1_area + model_2_area);
|
||||
if (average_area <= 0) {
|
||||
LOG(WARNING) << "Degenerative models passed -> empty intersection.";
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// First, clip transformed rectangle against origin defined by model_1.
|
||||
bool success = true;
|
||||
Model diff = ModelDiffChecked(model_2, model_1, &success);
|
||||
if (!success) {
|
||||
LOG(WARNING) << "Model difference is singular -> empty intersection.";
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
float area = 0.0f;
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
Vector2_f start_1 = Adapter::TransformPoint(diff, lines[k].first);
|
||||
Vector2_f end_1 = Adapter::TransformPoint(diff, lines[k].second);
|
||||
if (ClipLine(rect, false, &start_1, &end_1)) {
|
||||
// Non-empty intersection.
|
||||
// Transform intersection back to world coordinate system.
|
||||
const Vector2_f start = Adapter::TransformPoint(model_1, start_1);
|
||||
const Vector2_f end = Adapter::TransformPoint(model_1, end_1);
|
||||
// Use trapezoidal rule for polygon area without explicit ordering of
|
||||
// vertices.
|
||||
area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
|
||||
}
|
||||
}
|
||||
|
||||
// Second, clip transformed rectangle against origin defined by model_2.
|
||||
Model inv_diff = Adapter::InvertChecked(diff, &success);
|
||||
if (!success) {
|
||||
LOG(WARNING) << "Model difference is singular -> empty intersection.";
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
Vector2_f start_2 = Adapter::TransformPoint(inv_diff, lines[k].first);
|
||||
Vector2_f end_2 = Adapter::TransformPoint(inv_diff, lines[k].second);
|
||||
// Use strict comparison to address degenerate case of incident rectangles
|
||||
// in which case intersection would be counted twice if non-strict
|
||||
// comparison is employed.
|
||||
if (ClipLine(rect, true, &start_2, &end_2)) {
|
||||
// Transform start and end back to origin.
|
||||
const Vector2_f start = Adapter::TransformPoint(model_2, start_2);
|
||||
const Vector2_f end = Adapter::TransformPoint(model_2, end_2);
|
||||
area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize w.r.t. average rectangle area.
|
||||
return area / average_area;
|
||||
}
|
||||
|
||||
MixtureRowWeights::MixtureRowWeights(int frame_height, int margin, float sigma,
|
||||
float y_scale, int num_models)
|
||||
: frame_height_(frame_height),
|
||||
y_scale_(y_scale),
|
||||
margin_(margin),
|
||||
sigma_(sigma),
|
||||
num_models_(num_models) {
|
||||
mid_points_.resize(num_models_);
|
||||
|
||||
// Compute mid_point (row idx) for each model.
|
||||
if (flags_catmull_rom_mixture_weights) {
|
||||
const float model_height =
|
||||
static_cast<float>(frame_height_) / (num_models - 1);
|
||||
|
||||
// Use Catmull-rom spline.
|
||||
// Compute weighting matrix.
|
||||
weights_.resize(frame_height * num_models);
|
||||
float spline_weights[4];
|
||||
|
||||
// No margin support for splines.
|
||||
if (margin_ > 0) {
|
||||
LOG(WARNING) << "No margin support when flag catmull_rom_mixture_weights "
|
||||
<< "is set. Margin is reset to zero, it is recommended "
|
||||
<< "that RowWeightsBoundChecked is used to prevent "
|
||||
<< "segfaults.";
|
||||
margin_ = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < frame_height; ++i) {
|
||||
float* weight_ptr = &weights_[i * num_models];
|
||||
|
||||
float float_pos = static_cast<float>(i) / model_height;
|
||||
int int_pos = float_pos;
|
||||
memset(weight_ptr, 0, sizeof(weight_ptr[0]) * num_models);
|
||||
|
||||
float dy = float_pos - int_pos;
|
||||
|
||||
// Weights sum to one, for all choices of dy.
|
||||
// For definition see
|
||||
// en.wikipedia.org/wiki/Cubic_Hermite_spline#Catmull.E2.80.93Rom_spline
|
||||
spline_weights[0] = 0.5f * (dy * ((2.0f - dy) * dy - 1.0f));
|
||||
spline_weights[1] = 0.5f * (dy * dy * (3.0f * dy - 5.0f) + 2.0f);
|
||||
spline_weights[2] = 0.5f * (dy * ((4.0f - 3.0f * dy) * dy + 1.0f));
|
||||
spline_weights[3] = 0.5f * (dy * dy * (dy - 1.0f));
|
||||
|
||||
weight_ptr[int_pos] += spline_weights[1];
|
||||
if (int_pos > 0) {
|
||||
weight_ptr[int_pos - 1] += spline_weights[0];
|
||||
} else {
|
||||
weight_ptr[int_pos] += spline_weights[0]; // Double knot.
|
||||
}
|
||||
|
||||
CHECK_LT(int_pos, num_models - 1);
|
||||
weight_ptr[int_pos + 1] += spline_weights[2];
|
||||
if (int_pos + 1 < num_models - 1) {
|
||||
weight_ptr[int_pos + 2] += spline_weights[3];
|
||||
} else {
|
||||
weight_ptr[int_pos + 1] += spline_weights[3]; // Double knot.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Gaussian weights.
|
||||
const float model_height = static_cast<float>(frame_height_) / num_models;
|
||||
|
||||
for (int i = 0; i < num_models; ++i) {
|
||||
mid_points_[i] = (i + 0.5f) * model_height;
|
||||
}
|
||||
|
||||
// Compute gaussian weights.
|
||||
const int num_values = frame_height_ + 2 * margin_;
|
||||
std::vector<float> row_dist_weights(num_values);
|
||||
const float common = -0.5f / (sigma * sigma);
|
||||
for (int i = 0; i < num_values; ++i) {
|
||||
row_dist_weights[i] = std::exp(common * i * i);
|
||||
}
|
||||
|
||||
// Compute weighting matrix.
|
||||
weights_.resize(num_values * num_models);
|
||||
for (int i = 0; i < num_values; ++i) {
|
||||
float* weight_ptr = &weights_[i * num_models];
|
||||
float weight_sum = 0;
|
||||
|
||||
// Gaussian weights via lookup.
|
||||
for (int j = 0; j < num_models; ++j) {
|
||||
weight_ptr[j] = row_dist_weights[abs(i - margin_ - mid_points_[j])];
|
||||
weight_sum += weight_ptr[j];
|
||||
}
|
||||
|
||||
// Normalize.
|
||||
DCHECK_GT(weight_sum, 0);
|
||||
const float inv_weight_sum = 1.0f / weight_sum;
|
||||
for (int j = 0; j < num_models; ++j) {
|
||||
weight_ptr[j] *= inv_weight_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float MixtureRowWeights::WeightThreshold(float frac_blocks) {
|
||||
const float model_height = static_cast<float>(frame_height_) / num_models_;
|
||||
|
||||
const float y = model_height * frac_blocks + mid_points_[0];
|
||||
const float* row_weights = RowWeightsClamped(y / y_scale_);
|
||||
return row_weights[0];
|
||||
}
|
||||
|
||||
// Explicit instantiations of ModelMethods.
|
||||
template class ModelMethods<TranslationModel>;
|
||||
template class ModelMethods<SimilarityModel>;
|
||||
template class ModelMethods<LinearSimilarityModel>;
|
||||
template class ModelMethods<AffineModel>;
|
||||
template class ModelMethods<Homography>;
|
||||
|
||||
} // namespace mediapipe
|
||||
1849
mediapipe/util/tracking/motion_models.h
Normal file
122
mediapipe/util/tracking/motion_models.proto
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Motion models, that are supported and estimated by MotionStabilization.
|
||||
// Note, that transformations represent the motion of the
|
||||
// feature points from the previous frame to the current one.
|
||||
// Note that the actual camera movement, is the inverse of this
|
||||
// transformation.
|
||||
// Check carefully, which movement (feature or camera) is required.
|
||||
// Note that for each model the default values always specify an identity
|
||||
// transform. Follow this rule when adding new models.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
// Here x represents a 2D point in the image plane, in the following
|
||||
// coordinate system:
|
||||
// *----> x
|
||||
// |
|
||||
// |
|
||||
// v y
|
||||
|
||||
// Simple translational model:
|
||||
// I * x + [dx; dy] with I being 2x2 identity transform.
|
||||
message TranslationModel {
|
||||
optional float dx = 1 [default = 0];
|
||||
optional float dy = 2 [default = 0];
|
||||
}
|
||||
|
||||
// Non-linear similarity model (w.r.t. to its parametrization).
|
||||
// c_r := cos(rotation);
|
||||
// s_r := sin(rotation);
|
||||
// Transformation applied to x:
|
||||
// [scale 0; * [c_r -s_r; * x + [dx;
|
||||
// 0 scale] s_r c_r] dy]
|
||||
message SimilarityModel {
|
||||
optional float dx = 1 [default = 0];
|
||||
optional float dy = 2 [default = 0];
|
||||
optional float scale = 3 [default = 1];
|
||||
optional float rotation = 4 [default = 0]; // angle in [-pi, pi].
|
||||
}
|
||||
|
||||
// Linear similarity model:
|
||||
// [a -b; * x + [dx;
|
||||
// b a] dy]
|
||||
message LinearSimilarityModel {
|
||||
optional float dx = 1 [default = 0];
|
||||
optional float dy = 2 [default = 0];
|
||||
optional float a = 3 [default = 1];
|
||||
optional float b = 4 [default = 0];
|
||||
}
|
||||
|
||||
// Affine according to
|
||||
// ( [a b * x + [dx;
|
||||
// ( c d] dy]
|
||||
message AffineModel {
|
||||
optional float dx = 1 [default = 0];
|
||||
optional float dy = 2 [default = 0];
|
||||
optional float a = 3 [default = 1];
|
||||
optional float b = 4 [default = 0];
|
||||
optional float c = 5 [default = 0];
|
||||
optional float d = 6 [default = 1];
|
||||
}
|
||||
|
||||
// Homography according to
|
||||
// [h_00 h_01 h_02;
|
||||
// h_10 h_11 h_12;
|
||||
// h_20 h_21 1];
|
||||
// Note: The parametrization with h_22 = 1 does not always hold, e.g.
|
||||
// if the origin (0, 0, 1) gets mapped to the line at infinity
|
||||
// (0, 0, 1). However for video we expect small perspective
|
||||
// changes between frames and this parametrization improves
|
||||
// robustness greatly as it removes an additional DOF.
|
||||
// Therefore, all methods in motion_stabilization should not be
|
||||
// used for general wide-baseline matching of frames.
|
||||
message Homography {
|
||||
optional float h_00 = 1 [default = 1];
|
||||
optional float h_01 = 2 [default = 0];
|
||||
optional float h_02 = 3 [default = 0];
|
||||
optional float h_10 = 4 [default = 0];
|
||||
optional float h_11 = 5 [default = 1];
|
||||
optional float h_12 = 6 [default = 0];
|
||||
optional float h_20 = 7 [default = 0];
|
||||
optional float h_21 = 8 [default = 0];
|
||||
}
|
||||
|
||||
// Mixture models with higher degrees of freedom, according to
|
||||
// \sum_i model(i) * weight(i), where weights are passed during transform and
|
||||
// are expected to sum to one.
|
||||
message MixtureLinearSimilarity {
|
||||
repeated LinearSimilarityModel model = 1;
|
||||
}
|
||||
|
||||
message MixtureAffine {
|
||||
repeated AffineModel model = 1;
|
||||
}
|
||||
|
||||
message MixtureHomography {
|
||||
repeated Homography model = 1;
|
||||
// Specifies which degree of freedom vary across mixture.
|
||||
// Can be used to implement several transformation functions quicker.
|
||||
enum VariableDOF {
|
||||
ALL_DOF = 0; // All dof are variable.
|
||||
TRANSLATION_DOF = 1; // Only translation (h_02, h_12) varies.
|
||||
SKEW_ROTATION_DOF = 2; // Translation (h_02, h_12), and skew-rotation
|
||||
// (h_01, h_10) vary.
|
||||
CONST_DOF = 3; // Mixture is constant.
|
||||
}
|
||||
optional VariableDOF dof = 2 [default = ALL_DOF];
|
||||
}
|
||||
57
mediapipe/util/tracking/motion_models_cv.cc
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/motion_models_cv.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
void ModelCvConvert<TranslationModel>::ToCvMat(const TranslationModel& model,
|
||||
cv::Mat* matrix) {
|
||||
ModelCvConvert<AffineModel>::ToCvMat(
|
||||
ModelAdapter<TranslationModel>::ToAffine(model), matrix);
|
||||
}
|
||||
|
||||
void ModelCvConvert<LinearSimilarityModel>::ToCvMat(
|
||||
const LinearSimilarityModel& model, cv::Mat* matrix) {
|
||||
ModelCvConvert<AffineModel>::ToCvMat(
|
||||
ModelAdapter<LinearSimilarityModel>::ToAffine(model), matrix);
|
||||
}
|
||||
|
||||
void ModelCvConvert<AffineModel>::ToCvMat(const AffineModel& model,
|
||||
cv::Mat* matrix) {
|
||||
matrix->create(2, 3, CV_32FC1);
|
||||
matrix->at<float>(0, 0) = model.a();
|
||||
matrix->at<float>(0, 1) = model.b();
|
||||
matrix->at<float>(0, 2) = model.dx();
|
||||
matrix->at<float>(1, 0) = model.c();
|
||||
matrix->at<float>(1, 1) = model.d();
|
||||
matrix->at<float>(1, 2) = model.dy();
|
||||
}
|
||||
|
||||
void ModelCvConvert<Homography>::ToCvMat(const Homography& model,
|
||||
cv::Mat* matrix) {
|
||||
CHECK(matrix != nullptr);
|
||||
matrix->create(3, 3, CV_32FC1);
|
||||
matrix->at<float>(0, 0) = model.h_00();
|
||||
matrix->at<float>(0, 1) = model.h_01();
|
||||
matrix->at<float>(0, 2) = model.h_02();
|
||||
matrix->at<float>(1, 0) = model.h_10();
|
||||
matrix->at<float>(1, 1) = model.h_11();
|
||||
matrix->at<float>(1, 2) = model.h_12();
|
||||
matrix->at<float>(2, 0) = model.h_20();
|
||||
matrix->at<float>(2, 1) = model.h_21();
|
||||
matrix->at<float>(2, 2) = 1.0;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
63
mediapipe/util/tracking/motion_models_cv.h
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
|
||||
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
#include "mediapipe/util/tracking/motion_models.pb.h" // NOLINT
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
template <class Model>
|
||||
class ModelCvConvert {};
|
||||
|
||||
// Specialized implementations, with additional functionality if needed.
|
||||
template <>
|
||||
class ModelCvConvert<TranslationModel> {
|
||||
public:
|
||||
// Returns 2x3 floating point cv::Mat with model parameters.
|
||||
static void ToCvMat(const TranslationModel& model, cv::Mat* matrix);
|
||||
};
|
||||
|
||||
template <>
|
||||
class ModelCvConvert<LinearSimilarityModel> {
|
||||
public:
|
||||
// Returns 2x3 floating point cv::Mat with model parameters.
|
||||
static void ToCvMat(const LinearSimilarityModel& model, cv::Mat* matrix);
|
||||
};
|
||||
|
||||
template <>
|
||||
class ModelCvConvert<AffineModel> {
|
||||
public:
|
||||
// Returns 2x3 floating point cv::Mat with model parameters.
|
||||
static void ToCvMat(const AffineModel& model, cv::Mat* matrix);
|
||||
};
|
||||
|
||||
template <>
|
||||
class ModelCvConvert<Homography> {
|
||||
public:
|
||||
// Returns 3x3 floating point cv::Mat with model parameters.
|
||||
static void ToCvMat(const Homography& model, cv::Mat* matrix);
|
||||
};
|
||||
|
||||
template <class Model>
|
||||
void ModelToCvMat(const Model& model, cv::Mat* matrix) {
|
||||
ModelCvConvert<Model>::ToCvMat(model, matrix);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
|
||||
335
mediapipe/util/tracking/motion_models_test.cc
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/motion_models.h"
|
||||
|
||||
#include "mediapipe/framework/deps/message_matchers.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/proto_ns.h"
|
||||
#include "mediapipe/framework/port/vector.h"
|
||||
#include "mediapipe/util/tracking/motion_estimation.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
static const float kArrayFloat[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
static const double kArrayDouble[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
|
||||
class MotionModelsTest : public ::testing::Test {};
|
||||
|
||||
// Test from/to array and parameter indexing functions.
|
||||
template <class Model>
|
||||
void CheckFromArrayAndGetParameter(const char* model_zero_string,
|
||||
const char* model_identity_string) {
|
||||
Model model_zero;
|
||||
Model model_identity;
|
||||
|
||||
ASSERT_TRUE(
|
||||
proto_ns::TextFormat::ParseFromString(model_zero_string, &model_zero));
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_identity_string,
|
||||
&model_identity));
|
||||
|
||||
typedef ModelAdapter<Model> Adapter;
|
||||
|
||||
EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, false),
|
||||
mediapipe::EqualsProto(model_zero));
|
||||
|
||||
EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, true),
|
||||
mediapipe::EqualsProto(model_identity));
|
||||
|
||||
EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, false),
|
||||
mediapipe::EqualsProto(model_zero));
|
||||
|
||||
EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, true),
|
||||
mediapipe::EqualsProto(model_identity));
|
||||
|
||||
ASSERT_LE(Adapter::NumParameters(), 8);
|
||||
for (int i = 0; i < Adapter::NumParameters(); ++i) {
|
||||
EXPECT_EQ(kArrayFloat[i], Adapter::GetParameter(model_zero, i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, FromArrayAndGetParameter) {
|
||||
CheckFromArrayAndGetParameter<TranslationModel>("dx: 1 dy: 2", "dx: 1 dy: 2");
|
||||
|
||||
CheckFromArrayAndGetParameter<SimilarityModel>(
|
||||
"dx: 1 dy: 2 scale: 3 rotation: 4", "dx: 1 dy: 2 scale: 4 rotation: 4");
|
||||
|
||||
CheckFromArrayAndGetParameter<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: 4",
|
||||
"dx: 1 dy: 2 a: 4 b: 4");
|
||||
|
||||
CheckFromArrayAndGetParameter<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
|
||||
"dx: 1 dy: 2 a: 4 b: 4 c: 5 d: 7");
|
||||
|
||||
CheckFromArrayAndGetParameter<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 5 h_12: 6 "
|
||||
"h_20: 7 h_21: 8 ",
|
||||
"h_00: 2 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 6 h_12: 6 "
|
||||
"h_20: 7 h_21: 8 ");
|
||||
}
|
||||
|
||||
// Test point transformations.
|
||||
template <class Model>
|
||||
void CheckTransformPoint(const char* model_string, float x_in, float y_in,
|
||||
float x_out, float y_out) {
|
||||
Model model;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
|
||||
|
||||
const Vector2_f output =
|
||||
ModelAdapter<Model>::TransformPoint(model, Vector2_f(x_in, y_in));
|
||||
|
||||
EXPECT_NEAR(x_out, output.x(), 1e-5);
|
||||
EXPECT_NEAR(y_out, output.y(), 1e-5);
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, TransformPoint) {
|
||||
CheckTransformPoint<TranslationModel>("dx: 0 dy: 0", 1, 1, 1, 1);
|
||||
CheckTransformPoint<TranslationModel>("dx: 1 dy: -1", 1, 1, 2, 0);
|
||||
|
||||
CheckTransformPoint<SimilarityModel>(
|
||||
"dx: 0 dy: 0 scale: 1 rotation: 1.57079633", 1, 2, -2, 1);
|
||||
CheckTransformPoint<SimilarityModel>(
|
||||
"dx: 1 dy: -1 scale: 1 rotation: 1.57079633", 1, 2, -1, 0);
|
||||
CheckTransformPoint<SimilarityModel>(
|
||||
"dx: 1 dy: -1 scale: 2 rotation: 1.57079633", 1, 2, -3, 1);
|
||||
|
||||
CheckTransformPoint<LinearSimilarityModel>("dx: 0 dy: 0 a: 1 b: -0.5", 1, 2,
|
||||
2, 1.5);
|
||||
CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 1 b: 0.5", 1,
|
||||
2, 0.5, 2);
|
||||
CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 0.5 b: 0.5",
|
||||
1, 2, 0, 1);
|
||||
|
||||
CheckTransformPoint<AffineModel>("dx: 0 dy: 0 a: 1 b: 0.5 c: -0.5 d: 1", 1, 2,
|
||||
2, 1.5);
|
||||
CheckTransformPoint<AffineModel>("dx: 0.5 dy: -0.5 a: 2 b: -0.5 c: 0.5 d: 1",
|
||||
1, 2, 1.5, 2);
|
||||
CheckTransformPoint<AffineModel>("dx: 1 dy: -1 a: 2 b: -2 c: 1 d: -1", 1, 2,
|
||||
-1, -2);
|
||||
|
||||
// Transformations by Homography are followed by divsion by the 3rd element.
|
||||
// Test division by value != 1.
|
||||
CheckTransformPoint<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 3 h_12: 6 "
|
||||
"h_20: 7 h_21: 8 ",
|
||||
1, 2, 8.0 / 24.0, 16.0 / 24.0);
|
||||
// Test division by 1.
|
||||
CheckTransformPoint<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 3 h_12: 6 "
|
||||
"h_20: 2 h_21: -1 ",
|
||||
1, 2, 8.0, 16.0);
|
||||
}
|
||||
|
||||
// Test model inversions.
|
||||
template <class Model>
|
||||
void CheckInvert(const char* model_string, const char* inv_model_string) {
|
||||
Model model;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
|
||||
|
||||
Model inv_model_expected;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(inv_model_string,
|
||||
&inv_model_expected));
|
||||
|
||||
typedef ModelAdapter<Model> Adapter;
|
||||
const Model inv_model_actual = Adapter::Invert(model);
|
||||
|
||||
for (int i = 0; i < Adapter::NumParameters(); ++i) {
|
||||
EXPECT_NEAR(Adapter::GetParameter(inv_model_expected, i),
|
||||
Adapter::GetParameter(inv_model_actual, i), 1e-5)
|
||||
<< "Parameter index: " << i << " of total " << Adapter::NumParameters();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, Invert) {
|
||||
CheckInvert<TranslationModel>("dx: 1 dy: -2", "dx: -1 dy: 2");
|
||||
|
||||
CheckInvert<SimilarityModel>("dx: 0 dy: 0 scale: 1 rotation: 1.57079633",
|
||||
"dx: 0 dy: 0 scale: 1 rotation: -1.57079633");
|
||||
CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 1 rotation: 1.57079633",
|
||||
"dx: 2 dy: 1 scale: 1 rotation: -1.57079633");
|
||||
CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 0.5 rotation: 1.57079633",
|
||||
"dx: 4 dy: 2 scale: 2 rotation: -1.57079633");
|
||||
|
||||
CheckInvert<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: 4 ",
|
||||
"dx: -0.44 dy: -0.08 a: 0.12 b: -0.16 ");
|
||||
|
||||
// Test division by value != 1.
|
||||
CheckInvert<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: -3 h_11: -2 h_12: -1 "
|
||||
"h_20: 8 h_21: -1 ",
|
||||
"h_00: -0.75 h_01: -1.25 h_02: 1 "
|
||||
"h_10: -1.25 h_11: -5.75 h_12: -2 "
|
||||
"h_20: 4.75 h_21: 4.25 ");
|
||||
// Test division by 1.
|
||||
CheckInvert<Homography>(
|
||||
"h_00: -0.75 h_01: -1.25 h_02: 1 "
|
||||
"h_10: -1.25 h_11: -5.75 h_12: -2 "
|
||||
"h_20: 4.75 h_21: 4.25 ",
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: -3 h_11: -2 h_12: -1 "
|
||||
"h_20: 8 h_21: -1 ");
|
||||
}
|
||||
|
||||
// Test model compositions.
|
||||
template <class Model>
|
||||
void CheckCompose(const char* model_string1, const char* model_string2,
|
||||
const char* composed_string) {
|
||||
Model model1;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string1, &model1));
|
||||
|
||||
Model model2;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string2, &model2));
|
||||
|
||||
Model composed_expected;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(composed_string,
|
||||
&composed_expected));
|
||||
|
||||
typedef ModelAdapter<Model> Adapter;
|
||||
const Model composed_actual = Adapter::Compose(model1, model2);
|
||||
|
||||
for (int i = 0; i < Adapter::NumParameters(); ++i) {
|
||||
EXPECT_NEAR(Adapter::GetParameter(composed_expected, i),
|
||||
Adapter::GetParameter(composed_actual, i), 1e-5)
|
||||
<< "Parameter index: " << i << " of total " << Adapter::NumParameters();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, Compose) {
|
||||
CheckCompose<TranslationModel>("dx: 1 dy: -2", "dx: -3 dy: 4",
|
||||
"dx: -2 dy: 2");
|
||||
|
||||
CheckCompose<SimilarityModel>(
|
||||
"dx: 1 dy: 2 scale: 0.5 rotation: 1.57079633 ",
|
||||
"dx: -2 dy: -1 scale: 2 rotation: -1.57079633 ",
|
||||
"dx: 1.5 dy: 1 scale: 1 rotation: 0 ");
|
||||
|
||||
CheckCompose<LinearSimilarityModel>("dx: 1 dy: 2 a: 0.5 b: 0.5 ",
|
||||
"dx: -2 dy: -1 a: 2 b: -0.5 ",
|
||||
"dx: 0.5 dy: 0.5 a: 1.25 b: 0.75 ");
|
||||
|
||||
// Test division by value != 1.
|
||||
CheckCompose<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 5 h_12: 6 "
|
||||
"h_20: 1 h_21: -1 ",
|
||||
"h_00: -3 h_01: -2 h_02: -1 "
|
||||
"h_10: -4 h_11: -5 h_12: -2 "
|
||||
"h_20: 7 h_21: 8 ",
|
||||
"h_00: 5 h_01: 6 h_02: -1 "
|
||||
"h_10: 5 h_11: 7.5 h_12: -4 "
|
||||
"h_20: 4 h_21: 5.5 ");
|
||||
// Test division by 1.
|
||||
CheckCompose<Homography>(
|
||||
"h_00: 1 h_01: 2 h_02: 3 "
|
||||
"h_10: 4 h_11: 5 h_12: 6 "
|
||||
"h_20: 2 h_21: -1 ",
|
||||
"h_00: -3 h_01: -2 h_02: -1 "
|
||||
"h_10: -4 h_11: -5 h_12: -2 "
|
||||
"h_20: 7 h_21: 8 ",
|
||||
"h_00: 10 h_01: 12 h_02: -2 "
|
||||
"h_10: 10 h_11: 15 h_12: -8 "
|
||||
"h_20: 5 h_21: 9 ");
|
||||
}
|
||||
|
||||
// Test conversions between models and their affine representations, and
|
||||
// vice-versa.
|
||||
template <class Model>
|
||||
void CheckToFromAffine(const char* model_string, const char* affine_string) {
|
||||
Model model;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
|
||||
|
||||
AffineModel affine;
|
||||
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(affine_string, &affine));
|
||||
|
||||
typedef ModelAdapter<Model> Adapter;
|
||||
EXPECT_THAT(Adapter::ToAffine(model), mediapipe::EqualsProto(affine));
|
||||
EXPECT_THAT(Adapter::FromAffine(affine), mediapipe::EqualsProto(model));
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, ToFromAffine) {
|
||||
CheckToFromAffine<TranslationModel>("dx: 1 dy: 2",
|
||||
"dx: 1 dy: 2 a: 1 b: 0 c: 0 d: 1");
|
||||
|
||||
CheckToFromAffine<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: -4",
|
||||
"dx: 1 dy: 2 a: 3 b: 4 c: -4 d: 3");
|
||||
|
||||
CheckToFromAffine<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
|
||||
"dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
|
||||
|
||||
CheckToFromAffine<Homography>(
|
||||
"h_00: 3 h_01: 4 h_02: 1 "
|
||||
"h_10: 5 h_11: 6 h_12: 2 "
|
||||
"h_20: 0 h_21: 0 ",
|
||||
"dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
|
||||
Homography homography;
|
||||
ASSERT_TRUE(
|
||||
proto_ns::TextFormat::ParseFromString("h_00: 3 h_01: 4 h_02: 1 "
|
||||
"h_10: 5 h_11: 6 h_12: 2 "
|
||||
"h_20: 0 h_21: 0 ",
|
||||
&homography));
|
||||
|
||||
EXPECT_TRUE(HomographyAdapter::IsAffine(homography));
|
||||
homography.set_h_20(7);
|
||||
homography.set_h_21(8);
|
||||
EXPECT_FALSE(HomographyAdapter::IsAffine(homography));
|
||||
}
|
||||
|
||||
TEST_F(MotionModelsTest, ProjectModels) {
|
||||
// Express models w.r.t. center for easy testing.
|
||||
LinearSimilarityModel center_trans =
|
||||
LinearSimilarityAdapter::FromArgs(50, 50, 1, 0);
|
||||
LinearSimilarityModel inv_center_trans =
|
||||
LinearSimilarityAdapter::FromArgs(-50, -50, 1, 0);
|
||||
|
||||
// 20 x 10 translation with scaling of factor 2 and rotation.
|
||||
LinearSimilarityModel lin_sim =
|
||||
LinearSimilarityAdapter::FromArgs(20, 10, 2 * cos(0.2), 2 * sin(0.2));
|
||||
|
||||
LinearSimilarityModel lin_sim_center =
|
||||
ModelCompose3(center_trans, lin_sim, inv_center_trans);
|
||||
|
||||
TranslationModel translation =
|
||||
TranslationAdapter::ProjectFrom(lin_sim_center, 100, 100);
|
||||
EXPECT_NEAR(translation.dx(), 20, 1e-3);
|
||||
EXPECT_NEAR(translation.dy(), 10, 1e-3);
|
||||
|
||||
translation = ProjectViaFit<TranslationModel>(lin_sim_center, 100, 100);
|
||||
EXPECT_NEAR(translation.dx(), 20, 1e-3);
|
||||
EXPECT_NEAR(translation.dy(), 10, 1e-3);
|
||||
|
||||
Homography homog = HomographyAdapter::FromArgs(
|
||||
1, 0, 10, 0, 1, 20, 5e-3, 1e-3); // Perspective transform: yaw + pitch.
|
||||
|
||||
Homography homog_center =
|
||||
ModelCompose3(HomographyAdapter::Embed(center_trans), homog,
|
||||
HomographyAdapter::Embed(inv_center_trans));
|
||||
// Rendering:
|
||||
// https://www.wolframalpha.com/input/?i=ListPlot%5B%7B+%7B7,-7%7D,+%7B108,16%7D,+%7B104,96%7D,+%7B12.5,+125%7D,+%7B7,-7%7D%5D
|
||||
|
||||
translation = TranslationAdapter::ProjectFrom(homog_center, 100, 100);
|
||||
EXPECT_NEAR(translation.dx(), 10, 1e-3);
|
||||
EXPECT_NEAR(translation.dy(), 20, 1e-3);
|
||||
|
||||
// TODO: Investigate how ProjectViaFit can yield similar result.
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
700
mediapipe/util/tracking/motion_saliency.cc
Normal file
|
|
@ -0,0 +1,700 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/motion_saliency.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <deque>
|
||||
#include <iterator>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/util/tracking/camera_motion.h"
|
||||
#include "mediapipe/util/tracking/measure_time.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
#include "mediapipe/util/tracking/region_flow.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
MotionSaliency::MotionSaliency(const MotionSaliencyOptions& options,
|
||||
int frame_width, int frame_height)
|
||||
: options_(options),
|
||||
frame_width_(frame_width),
|
||||
frame_height_(frame_height) {}
|
||||
|
||||
MotionSaliency::~MotionSaliency() {}
|
||||
|
||||
void MotionSaliency::SaliencyFromFeatures(
|
||||
const RegionFlowFeatureList& feature_list,
|
||||
std::vector<float>* irls_weights, // optional.
|
||||
SalientPointFrame* salient_frame) {
|
||||
CHECK(salient_frame);
|
||||
CHECK_EQ(frame_width_, feature_list.frame_width());
|
||||
CHECK_EQ(frame_height_, feature_list.frame_height());
|
||||
|
||||
if (irls_weights) {
|
||||
CHECK_EQ(feature_list.feature_size(), irls_weights->size());
|
||||
}
|
||||
|
||||
if (feature_list.feature_size() < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
float max_irls_weight = 0;
|
||||
if (irls_weights) {
|
||||
max_irls_weight =
|
||||
*std::max_element(irls_weights->begin(), irls_weights->end());
|
||||
} else {
|
||||
struct FeatureIRLSComparator {
|
||||
bool operator()(const RegionFlowFeature& lhs,
|
||||
const RegionFlowFeature& rhs) const {
|
||||
return lhs.irls_weight() < rhs.irls_weight();
|
||||
}
|
||||
};
|
||||
max_irls_weight =
|
||||
std::max_element(feature_list.feature().begin(),
|
||||
feature_list.feature().end(), FeatureIRLSComparator())
|
||||
->irls_weight();
|
||||
}
|
||||
|
||||
// Max weight is too small for meaningful mode finding, terminate.
|
||||
if (max_irls_weight < 1e-2f) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Discard small weights that just slow clustering down.
|
||||
const float irls_cutoff = max_irls_weight * 1e-2f;
|
||||
|
||||
int feat_idx = 0;
|
||||
|
||||
// Create SalientLocation's from input feature_list.
|
||||
std::vector<SalientLocation> features;
|
||||
for (const auto& src_feature : feature_list.feature()) {
|
||||
const float weight =
|
||||
irls_weights ? (*irls_weights)[feat_idx] : src_feature.irls_weight();
|
||||
++feat_idx;
|
||||
|
||||
// Discard all features with small measure or zero weight from mode finding.
|
||||
if (weight < irls_cutoff) {
|
||||
continue;
|
||||
}
|
||||
|
||||
features.push_back(SalientLocation(FeatureLocation(src_feature), weight));
|
||||
}
|
||||
|
||||
DetermineSalientFrame(features, salient_frame);
|
||||
}
|
||||
|
||||
void MotionSaliency::SaliencyFromPoints(const std::vector<Vector2_f>* points,
|
||||
const std::vector<float>* weights,
|
||||
SalientPointFrame* salient_frame) {
|
||||
// TODO: Handle vectors of size zero.
|
||||
CHECK(salient_frame);
|
||||
CHECK_EQ(points->size(), weights->size());
|
||||
|
||||
float max_weight = *std::max_element(weights->begin(), weights->end());
|
||||
|
||||
// Max weight is too small for meaningful mode finding, terminate.
|
||||
if (max_weight < 1e-2f) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Discard small weights that just slow clustering down.
|
||||
const float weight_cutoff = max_weight * 1e-2f;
|
||||
|
||||
// Create SalientLocation's from input points.
|
||||
std::vector<SalientLocation> features;
|
||||
for (int point_idx = 0; point_idx < points->size(); ++point_idx) {
|
||||
const float weight = (*weights)[point_idx];
|
||||
// Discard all features with small measure or zero weight from mode finding.
|
||||
if (weight < weight_cutoff) {
|
||||
continue;
|
||||
}
|
||||
|
||||
features.push_back(SalientLocation((*points)[point_idx], weight));
|
||||
}
|
||||
|
||||
DetermineSalientFrame(features, salient_frame);
|
||||
}
|
||||
|
||||
// We only keep those salient points that have neighbors along the temporal
|
||||
// dimension.
|
||||
void MotionSaliency::SelectSaliencyInliers(
|
||||
std::vector<SalientPointFrame*>* motion_saliency,
|
||||
bool rescale_to_median_saliency_weight) {
|
||||
float scale = 1.0;
|
||||
|
||||
if (rescale_to_median_saliency_weight) {
|
||||
// Compute median saliency weight across all frames, to rescale saliency.
|
||||
std::vector<float> saliency_weights;
|
||||
for (int i = 0; i < motion_saliency->size(); ++i) {
|
||||
for (const auto& salient_point : (*motion_saliency)[i]->point()) {
|
||||
saliency_weights.push_back(salient_point.weight());
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to filter in the frame chunk.
|
||||
if (saliency_weights.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto median_iter = saliency_weights.begin() + saliency_weights.size() / 2;
|
||||
std::nth_element(saliency_weights.begin(), median_iter,
|
||||
saliency_weights.end());
|
||||
|
||||
const float median_weight = *median_iter;
|
||||
if (median_weight > 0) {
|
||||
scale = options_.saliency_weight() / median_weight;
|
||||
}
|
||||
}
|
||||
|
||||
SaliencyPointList inlier_saliency(motion_saliency->size());
|
||||
const float sq_support_distance = options_.selection_support_distance() *
|
||||
options_.selection_support_distance();
|
||||
|
||||
// Test each point salient point for inlierness.
|
||||
for (int i = 0; i < motion_saliency->size(); ++i) {
|
||||
for (const auto& salient_point : (*motion_saliency)[i]->point()) {
|
||||
int support = 0;
|
||||
Vector2_f salient_location(salient_point.norm_point_x(),
|
||||
salient_point.norm_point_y());
|
||||
|
||||
// Find supporting points (saliency points close enough to current one)
|
||||
// in adjacent frames. Linear Complexity.
|
||||
for (int j = std::max<int>(0, i - options_.selection_frame_radius()),
|
||||
end_j = std::min<int>(i + options_.selection_frame_radius(),
|
||||
motion_saliency->size() - 1);
|
||||
|
||||
j <= end_j; ++j) {
|
||||
if (i == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto& compare_point : (*motion_saliency)[j]->point()) {
|
||||
Vector2_f compare_location(compare_point.norm_point_x(),
|
||||
compare_point.norm_point_y());
|
||||
|
||||
if ((salient_location - compare_location).Norm2() <=
|
||||
sq_support_distance) {
|
||||
++support;
|
||||
}
|
||||
}
|
||||
} // end neighbor frames iteration.
|
||||
|
||||
if (support >= options_.selection_minimum_support()) {
|
||||
SalientPoint* scaled_point = inlier_saliency[i].add_point();
|
||||
scaled_point->CopyFrom(salient_point);
|
||||
scaled_point->set_weight(scaled_point->weight() * scale);
|
||||
}
|
||||
} // end point traversal.
|
||||
} // end frame traversal.
|
||||
|
||||
for (int k = 0; k < motion_saliency->size(); ++k) {
|
||||
(*motion_saliency)[k]->Swap(&inlier_saliency[k]);
|
||||
}
|
||||
}
|
||||
|
||||
void MotionSaliency::FilterMotionSaliency(
|
||||
std::vector<SalientPointFrame*>* saliency_point_list) {
|
||||
CHECK(saliency_point_list != nullptr);
|
||||
const float sigma_time = options_.filtering_sigma_time();
|
||||
const float sigma_space = options_.filtering_sigma_space();
|
||||
|
||||
const int time_radius = ceil(sigma_time * 1.5);
|
||||
const int time_diameter = 2 * time_radius + 1;
|
||||
|
||||
// Create lookup table for weights.
|
||||
std::vector<float> time_weights(time_diameter);
|
||||
const float time_coeff = -0.5f / (sigma_time * sigma_time);
|
||||
|
||||
for (int i = -time_radius, time_idx = 0; i <= time_radius; ++i, ++time_idx) {
|
||||
time_weights[time_idx] = std::exp(time_coeff * i * i);
|
||||
}
|
||||
|
||||
// Ignore points further than 1.65 sigmas away (includes 90% of distribution).
|
||||
const float space_cutoff = 1.65 * sigma_space;
|
||||
const float space_exp_scale = -0.5f / (sigma_space * sigma_space);
|
||||
|
||||
// Copy saliency points.
|
||||
const int num_frames = saliency_point_list->size();
|
||||
std::vector<SalientPointFrame> points(num_frames + 2 * time_radius);
|
||||
|
||||
for (int k = 0; k < saliency_point_list->size(); ++k) {
|
||||
points[time_radius + k].CopyFrom(*(*saliency_point_list)[k]);
|
||||
}
|
||||
|
||||
// Copy border.
|
||||
std::copy(points.rbegin() + time_radius, points.rbegin() + 2 * time_radius,
|
||||
points.end() - time_radius);
|
||||
std::copy(points.begin() + time_radius, points.begin() + 2 * time_radius,
|
||||
points.rend() - time_radius);
|
||||
|
||||
// Apply filter.
|
||||
for (int i = time_radius; i < num_frames + time_radius; ++i) {
|
||||
const int frame_idx = i - time_radius;
|
||||
for (auto& sample_point :
|
||||
*(*saliency_point_list)[frame_idx]->mutable_point()) {
|
||||
Vector2_f point_sum(0, 0);
|
||||
// Sum for left, bottom, right, top tuple.
|
||||
Vector4_f bound_sum;
|
||||
Vector3_f ellipse_sum(0, 0, 0); // Captures major, minor and angle.
|
||||
float weight_sum = 0;
|
||||
float filter_sum = 0;
|
||||
|
||||
const float sample_angle = sample_point.angle();
|
||||
for (int k = i - time_radius, time_idx = 0; k <= i + time_radius;
|
||||
++k, ++time_idx) {
|
||||
for (const auto& test_point : points[k].point()) {
|
||||
const float diff = std::hypot(
|
||||
test_point.norm_point_y() - sample_point.norm_point_y(),
|
||||
test_point.norm_point_x() - sample_point.norm_point_x());
|
||||
if (diff > space_cutoff) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float weight = time_weights[time_idx] * test_point.weight() *
|
||||
std::exp(diff * diff * space_exp_scale);
|
||||
|
||||
filter_sum += weight;
|
||||
point_sum +=
|
||||
Vector2_f(test_point.norm_point_x(), test_point.norm_point_y()) *
|
||||
weight;
|
||||
bound_sum += Vector4_f(test_point.left(), test_point.bottom(),
|
||||
test_point.right(), test_point.top()) *
|
||||
weight;
|
||||
weight_sum += test_point.weight() * weight;
|
||||
|
||||
// Ensure test_point and sample are less than pi / 2 apart.
|
||||
float test_angle = test_point.angle();
|
||||
if (fabs(test_angle - sample_angle) > M_PI / 2) {
|
||||
if (sample_angle > M_PI / 2) {
|
||||
test_angle += M_PI;
|
||||
} else {
|
||||
test_angle -= M_PI;
|
||||
}
|
||||
}
|
||||
|
||||
ellipse_sum += Vector3_f(test_point.norm_major(),
|
||||
test_point.norm_minor(), test_angle) *
|
||||
weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (filter_sum > 0) {
|
||||
const float inv_filter_sum = 1.0f / filter_sum;
|
||||
point_sum *= inv_filter_sum;
|
||||
bound_sum *= inv_filter_sum;
|
||||
weight_sum *= inv_filter_sum;
|
||||
ellipse_sum *= inv_filter_sum;
|
||||
}
|
||||
|
||||
sample_point.set_norm_point_x(point_sum.x());
|
||||
sample_point.set_norm_point_y(point_sum.y());
|
||||
sample_point.set_left(bound_sum.x());
|
||||
sample_point.set_bottom(bound_sum.y());
|
||||
sample_point.set_right(bound_sum.z());
|
||||
sample_point.set_top(bound_sum.w());
|
||||
|
||||
sample_point.set_weight(weight_sum);
|
||||
sample_point.set_norm_major(ellipse_sum.x());
|
||||
sample_point.set_norm_minor(ellipse_sum.y());
|
||||
sample_point.set_angle(ellipse_sum.z());
|
||||
|
||||
if (sample_point.angle() > M_PI) {
|
||||
sample_point.set_angle(sample_point.angle() - M_PI);
|
||||
}
|
||||
if (sample_point.angle() < 0) {
|
||||
sample_point.set_angle(sample_point.angle() + M_PI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MotionSaliency::CollapseMotionSaliency(
|
||||
const SaliencyPointList& input_saliency, const Vector4_f& bounds,
|
||||
SaliencyPointList* output_saliency) {
|
||||
CHECK(output_saliency);
|
||||
output_saliency->clear();
|
||||
output_saliency->resize(input_saliency.size());
|
||||
|
||||
for (int f = 0; f < input_saliency.size(); ++f) { // traverse frames.
|
||||
Vector2_f mean_saliency(0, 0);
|
||||
float weight_sum = 0;
|
||||
for (const auto& salient_point : input_saliency[f].point()) {
|
||||
mean_saliency +=
|
||||
Vector2_f(salient_point.norm_point_x(), salient_point.norm_point_y());
|
||||
weight_sum += 1;
|
||||
}
|
||||
|
||||
if (weight_sum > 0) {
|
||||
SalientPoint* collapsed = (*output_saliency)[f].add_point();
|
||||
collapsed->set_norm_point_x(mean_saliency.x() / weight_sum);
|
||||
collapsed->set_norm_point_y(mean_saliency.y() / weight_sum);
|
||||
collapsed->set_left(bounds.x());
|
||||
collapsed->set_bottom(bounds.y());
|
||||
collapsed->set_right(bounds.z());
|
||||
collapsed->set_top(bounds.w());
|
||||
collapsed->set_weight(1.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Describes feature mode for a feature in a RegionFlowFeatureList stored
|
||||
// at index feature_idx.
|
||||
struct FeatureMode {
|
||||
Vector2_f location;
|
||||
float irls_weight;
|
||||
int feature_idx;
|
||||
int mode_bin;
|
||||
};
|
||||
|
||||
// Determines mode for each feature in feature_view.
|
||||
// Returns modes as list of pointers in mode_ptrs. Actual modes are stored
|
||||
// binned into a grid of equal size as the passed FeatureGrid
|
||||
// (bins of size grid_resolution x grid_resolution).
|
||||
void DetermineFeatureModes(
|
||||
const FeatureFrame<MotionSaliency::SalientLocation>& features,
|
||||
float grid_resolution, const Vector2_i& grid_dims, float band_width,
|
||||
const FeatureGrid<MotionSaliency::SalientLocation>& feature_grid,
|
||||
const std::vector<std::vector<int>>& feature_taps,
|
||||
const std::vector<float>& space_lut, float space_scale,
|
||||
std::vector<std::list<FeatureMode>>* mode_grid,
|
||||
std::vector<FeatureMode*>* mode_ptrs) {
|
||||
CHECK(mode_grid);
|
||||
CHECK(mode_ptrs);
|
||||
const int num_features = features.size();
|
||||
mode_ptrs->reserve(num_features);
|
||||
|
||||
const float grid_scale = 1.0f / grid_resolution;
|
||||
int feature_idx = 0;
|
||||
const int kMaxIter = 100;
|
||||
// Set convergence radius to 0.1% of bandwidth.
|
||||
const float sq_conv_radius = band_width * band_width * 1e-6f;
|
||||
for (const auto& feature_ptr : features) {
|
||||
Vector2_f center = feature_ptr->pt;
|
||||
int iter = 0;
|
||||
for (; iter < kMaxIter; ++iter) {
|
||||
const int bin_x = center.x() * grid_scale;
|
||||
const int bin_y = center.y() * grid_scale;
|
||||
const int grid_loc = bin_y * grid_dims.x() + bin_x;
|
||||
|
||||
float sum_weight = 0;
|
||||
Vector2_f new_center;
|
||||
for (const auto& bin : feature_taps[grid_loc]) {
|
||||
for (const auto& test_feat_ptr : feature_grid[bin]) {
|
||||
const float dist = (test_feat_ptr->pt - center).Norm();
|
||||
const float weight = space_lut[static_cast<int>(dist * space_scale)] *
|
||||
test_feat_ptr->weight;
|
||||
sum_weight += weight;
|
||||
new_center += weight * test_feat_ptr->pt;
|
||||
}
|
||||
}
|
||||
|
||||
if (sum_weight > 0) {
|
||||
new_center *= (1.0f / sum_weight);
|
||||
if ((center - new_center).Norm2() < sq_conv_radius) {
|
||||
center = new_center;
|
||||
break;
|
||||
} else {
|
||||
center = new_center;
|
||||
}
|
||||
} else {
|
||||
LOG(WARNING) << "No features found in band_width radius, "
|
||||
<< "should not happen. ";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int mode_bin_x = center.x() * grid_scale;
|
||||
const int mode_bin_y = center.y() * grid_scale;
|
||||
const int mode_grid_loc = mode_bin_y * grid_dims.x() + mode_bin_x;
|
||||
FeatureMode mode{center, feature_ptr->weight, feature_idx, mode_grid_loc};
|
||||
|
||||
(*mode_grid)[mode_grid_loc].push_back(mode);
|
||||
FeatureMode* added_mode = &(*mode_grid)[mode_grid_loc].back();
|
||||
(*mode_ptrs).push_back(added_mode);
|
||||
++feature_idx;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
void MotionSaliency::SalientModeFinding(std::vector<SalientLocation>* locations,
|
||||
std::vector<SalientMode>* modes) {
|
||||
CHECK(modes);
|
||||
CHECK(locations);
|
||||
if (locations->empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Scale band_width to image domain.
|
||||
const float band_width =
|
||||
hypot(frame_width_, frame_height_) * options_.mode_band_width();
|
||||
|
||||
// Select all salient locations with non-zero weight.
|
||||
FeatureFrame<SalientLocation> salient_features;
|
||||
salient_features.reserve(locations->size());
|
||||
for (auto& loc : *locations) {
|
||||
if (loc.weight > 1e-6) {
|
||||
salient_features.push_back(&loc);
|
||||
}
|
||||
}
|
||||
|
||||
const int num_features = salient_features.size();
|
||||
if (num_features == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Build feature grid according to bandwith.
|
||||
std::vector<FeatureGrid<SalientLocation>> feature_grids;
|
||||
std::vector<std::vector<int>> feature_taps;
|
||||
|
||||
// Guarantee at least 1.5 sigmas in each direction are captured with
|
||||
// tap 3 filtering (86 % of the data).
|
||||
const float grid_resolution = 1.5f * band_width;
|
||||
Vector2_i grid_dims;
|
||||
BuildFeatureGrid(
|
||||
frame_width_, frame_height_, grid_resolution, {salient_features},
|
||||
[](const SalientLocation& l) -> Vector2_f { return l.pt; }, &feature_taps,
|
||||
nullptr, &grid_dims, &feature_grids);
|
||||
|
||||
// Just one frame input, expect one grid as output.
|
||||
CHECK_EQ(1, feature_grids.size());
|
||||
const auto& feature_grid = feature_grids[0];
|
||||
|
||||
// Setup Gaussian LUT for smoothing in space, using 2^10 discretization bins.
|
||||
const int lut_bins = 1 << 10;
|
||||
std::vector<float> space_lut(lut_bins);
|
||||
|
||||
// Using 3 tap smoothing, max distance is 2 bin diagonals.
|
||||
// We use maximum of 2 * sqrt(2) * bin_radius plus 1% room in case maximum
|
||||
// value is attained.
|
||||
const float max_space_diff = sqrt(2.0) * 2.f * grid_resolution * 1.01f;
|
||||
|
||||
const float space_bin_size = max_space_diff / lut_bins;
|
||||
const float space_scale = 1.0f / space_bin_size;
|
||||
const float space_coeff = -0.5f / (band_width * band_width);
|
||||
for (int i = 0; i < lut_bins; ++i) {
|
||||
const float value = i * space_bin_size;
|
||||
space_lut[i] = std::exp(value * value * space_coeff);
|
||||
}
|
||||
|
||||
// Store modes for each grid bin (to be averaged later).
|
||||
std::vector<std::list<FeatureMode>> mode_grid(grid_dims.x() * grid_dims.y());
|
||||
std::vector<FeatureMode*> mode_ptrs;
|
||||
|
||||
DetermineFeatureModes(salient_features, grid_resolution, grid_dims,
|
||||
band_width, feature_grid, feature_taps, space_lut,
|
||||
space_scale, &mode_grid, &mode_ptrs);
|
||||
|
||||
// Read out modes, ordered by decreasing weight.
|
||||
struct FeatureModeComparator {
|
||||
bool operator()(const FeatureMode* mode_lhs,
|
||||
const FeatureMode* mode_rhs) const {
|
||||
return mode_lhs->irls_weight > mode_rhs->irls_weight;
|
||||
}
|
||||
};
|
||||
|
||||
// Sort pointers, to keep order immutable during flagging operations.
|
||||
std::sort(mode_ptrs.begin(), mode_ptrs.end(), FeatureModeComparator());
|
||||
|
||||
for (int m = 0; m < mode_ptrs.size(); ++m) {
|
||||
// We mark a mode as processed by assigning -1 to its index.
|
||||
if (mode_ptrs[m]->feature_idx < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
FeatureMode* mode = mode_ptrs[m];
|
||||
|
||||
// Average modes within band_width based on irls_weight * spatial weight.
|
||||
double sum_weight = mode->irls_weight;
|
||||
double mode_x = sum_weight * mode->location.x();
|
||||
double mode_y = sum_weight * mode->location.y();
|
||||
|
||||
const Vector2_f& feat_loc = salient_features[mode->feature_idx]->pt;
|
||||
double feat_x = sum_weight * feat_loc.x();
|
||||
double feat_y = sum_weight * feat_loc.y();
|
||||
double feat_xx = sum_weight * feat_loc.x() * feat_loc.x();
|
||||
double feat_xy = sum_weight * feat_loc.x() * feat_loc.y();
|
||||
double feat_yy = sum_weight * feat_loc.y() * feat_loc.y();
|
||||
|
||||
mode->feature_idx = -1; // Flag as processed, does not change order
|
||||
// of traversal.
|
||||
|
||||
for (const auto& bin : feature_taps[mode->mode_bin]) {
|
||||
for (auto& test_mode : mode_grid[bin]) {
|
||||
if (test_mode.feature_idx >= 0) {
|
||||
const float dist = (test_mode.location - mode->location).Norm();
|
||||
if (dist <= band_width) {
|
||||
const Vector2_f test_loc =
|
||||
salient_features[test_mode.feature_idx]->pt;
|
||||
const float weight =
|
||||
space_lut[static_cast<int>(dist * space_scale)] *
|
||||
test_mode.irls_weight;
|
||||
|
||||
sum_weight += weight;
|
||||
mode_x += weight * test_mode.location.x();
|
||||
mode_y += weight * test_mode.location.y();
|
||||
|
||||
const float test_loc_x_w = weight * test_loc.x();
|
||||
const float test_loc_y_w = weight * test_loc.y();
|
||||
feat_x += test_loc_x_w;
|
||||
feat_y += test_loc_y_w;
|
||||
|
||||
feat_xx += test_loc_x_w * test_loc.x();
|
||||
feat_xy += test_loc_x_w * test_loc.y();
|
||||
feat_yy += test_loc_y_w * test_loc.y();
|
||||
|
||||
// Flag as processed, does not change order of traversal.
|
||||
test_mode.feature_idx = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sum_weight >= options_.min_irls_mode_weight()) {
|
||||
double inv_sum_weight = 1.0f / sum_weight;
|
||||
mode_x *= inv_sum_weight;
|
||||
mode_y *= inv_sum_weight;
|
||||
feat_x *= inv_sum_weight;
|
||||
feat_y *= inv_sum_weight;
|
||||
feat_xx *= inv_sum_weight;
|
||||
feat_xy *= inv_sum_weight;
|
||||
feat_yy *= inv_sum_weight;
|
||||
|
||||
// Covariance matrix.
|
||||
const float a = feat_xx - 2.0 * feat_x * mode_x + mode_x * mode_x;
|
||||
const float bc =
|
||||
feat_xy - feat_x * mode_y - feat_y * mode_x + mode_x * mode_y;
|
||||
const float d = feat_yy - 2.0 * feat_y * mode_y + mode_y * mode_y;
|
||||
|
||||
Vector2_f axis_magnitude;
|
||||
float angle;
|
||||
if (!EllipseFromCovariance(a, bc, d, &axis_magnitude, &angle)) {
|
||||
angle = 0;
|
||||
axis_magnitude = Vector2_f(1, 1);
|
||||
} else {
|
||||
if (angle < 0) {
|
||||
angle += M_PI;
|
||||
}
|
||||
CHECK_GE(angle, 0);
|
||||
CHECK_LE(angle, M_PI + 1e-3);
|
||||
}
|
||||
|
||||
SalientMode irls_mode;
|
||||
irls_mode.location = Vector2_f(mode_x, mode_y);
|
||||
irls_mode.assignment_weight = sum_weight;
|
||||
irls_mode.axis_magnitude = axis_magnitude;
|
||||
irls_mode.angle = angle;
|
||||
modes->push_back(irls_mode);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort modes by descreasing weight.
|
||||
struct ModeWeightCompare {
|
||||
bool operator()(const SalientMode& lhs, const SalientMode& rhs) const {
|
||||
return lhs.assignment_weight > rhs.assignment_weight;
|
||||
}
|
||||
};
|
||||
|
||||
std::sort(modes->begin(), modes->end(), ModeWeightCompare());
|
||||
}
|
||||
|
||||
// Determines the salient frame for a list of SalientLocations by performing
|
||||
// mode finding and scales each point based on frame size.
|
||||
void MotionSaliency::DetermineSalientFrame(
|
||||
std::vector<SalientLocation> locations, SalientPointFrame* salient_frame) {
|
||||
CHECK(salient_frame);
|
||||
|
||||
std::vector<SalientMode> modes;
|
||||
{
|
||||
MEASURE_TIME << "Mode finding";
|
||||
SalientModeFinding(&locations, &modes);
|
||||
}
|
||||
|
||||
const float denom_x = 1.0f / frame_width_;
|
||||
const float denom_y = 1.0f / frame_height_;
|
||||
|
||||
// Convert to salient points.
|
||||
for (int mode_idx = 0,
|
||||
mode_sz = std::min<int>(modes.size(), options_.num_top_irls_modes());
|
||||
mode_idx < mode_sz; ++mode_idx) {
|
||||
SalientPoint* pt = salient_frame->add_point();
|
||||
pt->set_norm_point_x(modes[mode_idx].location.x());
|
||||
pt->set_norm_point_y(modes[mode_idx].location.y());
|
||||
pt->set_left(options_.bound_left());
|
||||
pt->set_bottom(options_.bound_bottom());
|
||||
pt->set_right(options_.bound_right());
|
||||
pt->set_top(options_.bound_top());
|
||||
|
||||
pt->set_norm_major(modes[mode_idx].axis_magnitude.x());
|
||||
pt->set_norm_minor(modes[mode_idx].axis_magnitude.y());
|
||||
pt->set_angle(modes[mode_idx].angle);
|
||||
pt->set_weight(modes[mode_idx].assignment_weight *
|
||||
options_.saliency_weight());
|
||||
|
||||
ScaleSalientPoint(denom_x, denom_y, pt);
|
||||
}
|
||||
}
|
||||
|
||||
void ForegroundWeightsFromFeatures(const RegionFlowFeatureList& feature_list,
|
||||
float foreground_threshold,
|
||||
float foreground_gamma,
|
||||
const CameraMotion* camera_motion,
|
||||
std::vector<float>* weights) {
|
||||
CHECK(weights != nullptr);
|
||||
weights->clear();
|
||||
|
||||
constexpr float kEpsilon = 1e-4f;
|
||||
|
||||
CHECK_GT(foreground_threshold, 0.0f);
|
||||
if (camera_motion) {
|
||||
foreground_threshold *=
|
||||
std::max(kEpsilon, InlierCoverage(*camera_motion, false));
|
||||
}
|
||||
|
||||
const float weight_denom = 1.0f / foreground_threshold;
|
||||
|
||||
// Map weights to foreground measure and determine minimum irls weight.
|
||||
for (const auto& feature : feature_list.feature()) {
|
||||
// Skip marked outliers.
|
||||
if (feature.irls_weight() == 0) {
|
||||
weights->push_back(0.0f);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Maps an irls_weight of magnitude weight_denom (from above) to zero,
|
||||
// with values below weight_denom assigned linearly mapped (zero is mapped
|
||||
// ot 1). Avoid mapping to zero as it used to mark outliers.
|
||||
const float foreground_measure =
|
||||
std::max(0.0f, 1.0f - feature.irls_weight() * weight_denom);
|
||||
|
||||
if (std::abs(foreground_gamma - 1.0f) < 1e-3f) {
|
||||
weights->push_back(std::max(kEpsilon, foreground_measure));
|
||||
} else {
|
||||
weights->push_back(
|
||||
std::max(kEpsilon, std::pow(foreground_measure, foreground_gamma)));
|
||||
}
|
||||
}
|
||||
CHECK_EQ(feature_list.feature_size(), weights->size());
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
150
mediapipe/util/tracking/motion_saliency.h
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Computes MotionSaliency points that can be used for stabilization and
|
||||
// retargeting.
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/port/vector.h"
|
||||
#include "mediapipe/util/tracking/motion_saliency.pb.h"
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
|
||||
namespace mediapipe {
|
||||
class RegionFlowFeatureList;
|
||||
class RegionFlowFrame;
|
||||
class SalientPointFrame;
|
||||
} // namespace mediapipe
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
class MotionSaliency {
|
||||
public:
|
||||
MotionSaliency(const MotionSaliencyOptions& options, int frame_width,
|
||||
int frame_height);
|
||||
~MotionSaliency();
|
||||
|
||||
// Finds modes in the RegionFlowFeatureList (clusters for high IRLS weight,
|
||||
// per default features agreeing with the background motion).
|
||||
// Optionally, per feature irls weights can be supplied instead of using the
|
||||
// features weight to adapt modes that will be found, e.g. see
|
||||
// ForegroundWeightsFromFeatures below.
|
||||
void SaliencyFromFeatures(const RegionFlowFeatureList& feature_list,
|
||||
std::vector<float>* irls_weights, // optional.
|
||||
SalientPointFrame* salient_frame);
|
||||
|
||||
// Finds saliency points (modes) from a list of points and their respective
|
||||
// weights, outputs a SalientPointFrame.
|
||||
void SaliencyFromPoints(const std::vector<Vector2_f>* points,
|
||||
const std::vector<float>* weights,
|
||||
SalientPointFrame* salient_frame);
|
||||
|
||||
// Selects saliency inliers, by searching for close-by salient points
|
||||
// (within fractional MotionSaliencyOptions::filtering_support_distance)
|
||||
// across adjacent frames (considered are
|
||||
// #MotionSaliencyOptions::filtering_frame_radius before and after the
|
||||
// current frame).
|
||||
// If at least #MotionSaliencyOptions::filtering_minimum_support
|
||||
// supporting points are found the tested salient point is kept, otherwise
|
||||
// discarded.
|
||||
// If desired performs rescaling, such that the median salient point weight
|
||||
// equals MotionSaliencyOptions::saliency_weight().
|
||||
void SelectSaliencyInliers(std::vector<SalientPointFrame*>* motion_saliency,
|
||||
bool rescale_to_median_saliency_weight);
|
||||
|
||||
// Averages all salient points (unweighted average) per frame. The resulting
|
||||
// mean salient point is assigned weight one, and the specified normalized
|
||||
// bounds (as tuple left, bottom, right, top).
|
||||
void CollapseMotionSaliency(const SaliencyPointList& input_saliency,
|
||||
const Vector4_f& bounds,
|
||||
SaliencyPointList* output_saliency);
|
||||
|
||||
// Smooths saliency in space and time.
|
||||
void FilterMotionSaliency(
|
||||
std::vector<SalientPointFrame*>* saliency_point_list);
|
||||
|
||||
// Aggregates location in image domain and salient weight.
|
||||
struct SalientLocation {
|
||||
SalientLocation() {}
|
||||
SalientLocation(const Vector2_f& _pt, float _weight)
|
||||
: pt(_pt), weight(_weight) {}
|
||||
Vector2_f pt;
|
||||
float weight = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
// Locates modes in a set of SalientLocation's.
|
||||
// (using mean shift with bilateral weights, i.e. weight * spatial
|
||||
// gaussian weighting).
|
||||
// Only modes with for which the sum of total saliency weight is
|
||||
// above min_irls_mode_sum are returned.
|
||||
// Returns modes in the image domain as 2D points, sum of their
|
||||
// assignment weights and spatial extend along major and minor axis.
|
||||
// Modes are sorted w.r.t. their assignment irls weights (from highest to
|
||||
// lowest).
|
||||
struct SalientMode {
|
||||
Vector2_f location;
|
||||
// Total sum of irls weights assigned to this mode.
|
||||
float assignment_weight = 0;
|
||||
// Magnitude of major and minor axis storred in x and y, respectively.
|
||||
Vector2_f axis_magnitude;
|
||||
// Angle in radians w.r.t. x-axis.
|
||||
float angle = 0;
|
||||
};
|
||||
|
||||
// Note: input vector locations is not mutated by function.
|
||||
void SalientModeFinding(std::vector<SalientLocation>* locations,
|
||||
std::vector<SalientMode>* modes);
|
||||
|
||||
// Determines the salient frame for a list of SalientLocations by performing
|
||||
// mode finding and scaling each point based on frame size.
|
||||
void DetermineSalientFrame(std::vector<SalientLocation> locations,
|
||||
SalientPointFrame* salient_frame);
|
||||
|
||||
MotionSaliencyOptions options_;
|
||||
int frame_width_;
|
||||
int frame_height_;
|
||||
};
|
||||
|
||||
// Returns foregroundness weights in [0, 1] for each feature, by mapping irls
|
||||
// weight to foreground score in [0, 1].
|
||||
// In particular, the foreground threshold indicates the *inverse* registration
|
||||
// error (i.e. the irls weight) that is deemed a complete inlier.
|
||||
// Weights in the interval [0, foreground_threshold] (corresponding to
|
||||
// pixel errors in the interval [1 / foreground_threshold, inf])
|
||||
// are mapped to 1 - [0, 1], i.e. foreground threshold is mapped to zero
|
||||
// with weights below the threshold being assigned values > 0.
|
||||
// Therefore, larger values will increase amount of detected foreground
|
||||
// as well as noise.
|
||||
// In addition, foreground_gamma's < 1 can be used to increase the resolution
|
||||
// of small foreground motions (irls weight close to the foreground_threshold)
|
||||
// at the expense of larger foreground motions (irls weight close to zero).
|
||||
// If optional parameter camera_motion is specified, the passed foreground
|
||||
// threshold is scaled by the InlierCoverage of the camera_motion
|
||||
// (which is in 0, 1). That is for unstable frames with small coverage,
|
||||
// the threshold is tighter and fewer features are considered foreground.
|
||||
void ForegroundWeightsFromFeatures(
|
||||
const RegionFlowFeatureList& feature_list,
|
||||
float foreground_threshold, // 0.5 is a good default value.
|
||||
float foreground_gamma, // use 1.0 for default
|
||||
const CameraMotion* camera_motion, // optional, can be nullptr.
|
||||
std::vector<float>* weights);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
|
||||
71
mediapipe/util/tracking/motion_saliency.proto
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
// Next tag: 17
|
||||
message MotionSaliencyOptions {
|
||||
// Standard normalized bounds and weights used to initialize salient points.
|
||||
// See region_flow.proto for details.
|
||||
optional float bound_left = 1 [default = 0.3];
|
||||
optional float bound_bottom = 2 [default = 0.3];
|
||||
optional float bound_right = 15 [default = 0.3];
|
||||
optional float bound_top = 16 [default = 0.3];
|
||||
|
||||
optional float saliency_weight = 3 [default = 20];
|
||||
|
||||
// If set, scales saliency_weight by flow magnitude.
|
||||
optional bool scale_weight_by_flow_magnitude = 8 [default = false];
|
||||
|
||||
// Minimum number of features within a region to be considered salient.
|
||||
// Only applicable for functions accepting RegionFlowFrames.
|
||||
optional int32 min_features = 4 [default = 5];
|
||||
|
||||
// If set, only considers regions flagged as forground.
|
||||
optional bool use_only_foreground_regions = 9 [default = false];
|
||||
|
||||
// Specifies roughly number of foreground features mapped to one mode,
|
||||
// for mode to be considered salient.
|
||||
optional float min_irls_mode_weight = 10 [default = 10];
|
||||
|
||||
// Only returns the top N irls modes.
|
||||
optional int32 num_top_irls_modes = 11 [default = 3];
|
||||
|
||||
// Mode finding is performed with a fraction radius of 10% of frame
|
||||
// diameter by default.
|
||||
optional float mode_band_width = 12 [default = 0.10];
|
||||
|
||||
// We filter salient points along the temporal dimension only, keeping those
|
||||
// that have sufficient support (in form of neighboring salient points). For
|
||||
// every salient point in frame n, all points in frames
|
||||
// [n - filtering_frame_radius, n + filtering_frame_radius] are tested,
|
||||
// whether they support the current test point.
|
||||
optional int32 selection_frame_radius = 5 [default = 5];
|
||||
|
||||
// Fractional distance to be considered a supporting salient point for a test
|
||||
// point.
|
||||
optional float selection_support_distance = 6 [default = 0.2];
|
||||
|
||||
// Minimum number of supporting salient points that need to be present in
|
||||
// order for a point to be considered an inlier.
|
||||
optional int32 selection_minimum_support = 7 [default = 4];
|
||||
|
||||
// Sigma in space (normalized domain).
|
||||
optional float filtering_sigma_space = 13 [default = 0.05];
|
||||
|
||||
// Sigma in time (in frames).
|
||||
optional float filtering_sigma_time = 14 [default = 5];
|
||||
}
|
||||
36
mediapipe/util/tracking/parallel_invoker.cc
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/parallel_invoker.h"
|
||||
|
||||
// Choose between ThreadPool, OpenMP and serial execution.
|
||||
// Note only one parallel_using_* directive can be active.
|
||||
int flags_parallel_invoker_mode = PARALLEL_INVOKER_MAX_VALUE;
|
||||
int flags_parallel_invoker_max_threads = 4;
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
#if defined(PARALLEL_INVOKER_ACTIVE)
|
||||
ThreadPool* ParallelInvokerThreadPool() {
|
||||
static ThreadPool* pool = []() -> ThreadPool* {
|
||||
ThreadPool* new_pool =
|
||||
new ThreadPool("ParallelInvoker", flags_parallel_invoker_max_threads);
|
||||
new_pool->StartWorkers();
|
||||
return new_pool;
|
||||
}();
|
||||
return pool;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace mediapipe
|
||||
507
mediapipe/util/tracking/parallel_invoker.h
Normal file
|
|
@ -0,0 +1,507 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Parallel for loop execution.
|
||||
// For details adapt parallel_using_* flags defined in parallel_invoker.cc.
|
||||
|
||||
// Usage example (for 1D):
|
||||
|
||||
// Define Functor or lambda function that implements:
|
||||
// void operator()(const BlockedRange & range) const;
|
||||
// (in addition functor needs to be copyable).
|
||||
|
||||
// Execute a for loop in parallel from 0 to N via:
|
||||
// ParallelFor(0, // start_index
|
||||
// num_frames, // end_index, exclusive
|
||||
// 1 // number of elements processed per iteration
|
||||
// [](const BlockedRange& range) {
|
||||
// // Process per-thread sub-range
|
||||
// for (int i = range.begin(); i < range.end(); ++i) {
|
||||
// // Process i'th item.
|
||||
// }
|
||||
// }
|
||||
|
||||
// Specific implementation to copy a vector of images in parallel.
|
||||
// class CopyInvoker {
|
||||
// public:
|
||||
// CopyInvoker(const vector<cv::Mat>& inputs,
|
||||
// vector<cv::Mat*>* outputs)
|
||||
// : inputs_(inputs), outputs_(outputs) {
|
||||
// }
|
||||
// CopyInvoker(const CopyInvoker& rhs)
|
||||
// : inputs_(rhs.inputs_), outputs_(rhs.outputs) {
|
||||
// }
|
||||
// void operator()(const BlockedRange& range) {
|
||||
// for (int frame = range.begin(); frame < range.end(); ++frame) {
|
||||
// inputs_[frame].copyTo(*(*outputs_)[frame]);
|
||||
// }
|
||||
// }
|
||||
// private:
|
||||
// const vector<cv::Mat>& inputs_;
|
||||
// vector<cv::Mat*>* outputs_;
|
||||
// }
|
||||
|
||||
// vector<cv::Mat> inputs;
|
||||
// vector<cv::Mat*> outputs;
|
||||
// ParallelFor(0, num_frames, 1, CopyInvoker(inputs, &outputs));
|
||||
//
|
||||
// OR (with lambdas):
|
||||
// ParallelFor(0, num_frames, 1,
|
||||
// [&inputs, &outputs](const BlockedRange& range) {
|
||||
// for (int frame = range.begin(); frame < range.end(); ++frame) {
|
||||
// inputs[frame].copyTo(*(outputs)[frame]);
|
||||
// }
|
||||
// }
|
||||
|
||||
#ifndef MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
|
||||
#define MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
|
||||
#ifdef PARALLEL_INVOKER_ACTIVE
|
||||
#include "mediapipe/framework/port/threadpool.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <dispatch/dispatch.h>
|
||||
#include <stdatomic.h>
|
||||
#endif
|
||||
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
|
||||
// Specifies parallelization implementation to use.
|
||||
enum PARALLEL_INVOKER_MODE {
|
||||
PARALLEL_INVOKER_NONE = 0, // Uses single threaded execution
|
||||
PARALLEL_INVOKER_THREAD_POOL = 1, // Uses //thread/threadpool
|
||||
PARALLEL_INVOKER_OPENMP = 2, // Uses OpenMP (requires compiler support)
|
||||
PARALLEL_INVOKER_GCD = 3, // Uses GCD (Apple)
|
||||
PARALLEL_INVOKER_MAX_VALUE = 4, // Increase when adding more modes
|
||||
};
|
||||
|
||||
extern int flags_parallel_invoker_mode;
|
||||
extern int flags_parallel_invoker_max_threads;
|
||||
|
||||
// Note flag: Parallel processing only activated if
|
||||
// PARALLEL_INVOKER_ACTIVE is defined.
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Partitions the range [begin, end) into equal blocks of size grain_size each
|
||||
// (except last one, might be less than grain_size).
|
||||
class BlockedRange {
|
||||
public:
|
||||
BlockedRange(int begin, int end, int grain_size)
|
||||
: begin_(begin), end_(end), grain_size_(grain_size) {}
|
||||
|
||||
int begin() const { return begin_; }
|
||||
int end() const { return end_; }
|
||||
int grain_size() const { return grain_size_; }
|
||||
|
||||
private:
|
||||
int begin_;
|
||||
int end_;
|
||||
int grain_size_;
|
||||
};
|
||||
|
||||
// Partitions the range row_range x col_range into equal
|
||||
// blocks of size row_range.grain_size() x col_range.grain_size() each
|
||||
// (except last column and row might be of size less than grain_size in one
|
||||
// or both of their dimensions).
|
||||
class BlockedRange2D {
|
||||
public:
|
||||
BlockedRange2D(const BlockedRange& rows, const BlockedRange& cols)
|
||||
: rows_(rows), cols_(cols) {}
|
||||
|
||||
const BlockedRange& rows() const { return rows_; }
|
||||
const BlockedRange& cols() const { return cols_; }
|
||||
|
||||
private:
|
||||
BlockedRange rows_;
|
||||
BlockedRange cols_;
|
||||
};
|
||||
|
||||
#ifdef PARALLEL_INVOKER_ACTIVE
|
||||
|
||||
// Singleton ThreadPool for parallel invoker.
|
||||
ThreadPool* ParallelInvokerThreadPool();
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Enable to allow GCD as an option beside ThreadPool.
|
||||
#define USE_PARALLEL_INVOKER_GCD 1
|
||||
#define CHECK_GCD_PARALLEL_WORK_COUNT DEBUG
|
||||
|
||||
template <class Invoker>
|
||||
class ParallelInvokerGCDContext {
|
||||
public:
|
||||
ParallelInvokerGCDContext(const Invoker& invoker, const BlockedRange& rows)
|
||||
: local_invoker_(invoker), rows_(rows) {
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
count_ = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
const Invoker& invoker() {
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
// Implicitly tracking the # of launched tasks at invoker retrieval.
|
||||
atomic_fetch_add(&count_, 1);
|
||||
#endif
|
||||
return local_invoker_;
|
||||
}
|
||||
const BlockedRange& rows() const { return rows_; }
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
const int count() { return atomic_load(&count_); }
|
||||
#endif
|
||||
|
||||
private:
|
||||
Invoker local_invoker_;
|
||||
const BlockedRange& rows_;
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
_Atomic(int32_t) count_;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <class Invoker>
|
||||
class ParallelInvokerGCDContext2D : public ParallelInvokerGCDContext<Invoker> {
|
||||
public:
|
||||
ParallelInvokerGCDContext2D(const Invoker& invoker, const BlockedRange& rows,
|
||||
const BlockedRange& cols)
|
||||
: ParallelInvokerGCDContext<Invoker>(invoker, rows), cols_(cols) {}
|
||||
|
||||
const BlockedRange& cols() const { return cols_; }
|
||||
|
||||
private:
|
||||
BlockedRange cols_;
|
||||
};
|
||||
|
||||
template <class Invoker>
|
||||
static void ParallelForGCDTask(void* context, size_t index) {
|
||||
ParallelInvokerGCDContext<Invoker>* invoker_context =
|
||||
static_cast<ParallelInvokerGCDContext<Invoker>*>(context);
|
||||
const BlockedRange& all_tasks = invoker_context->rows();
|
||||
int start = all_tasks.begin() + index * all_tasks.grain_size();
|
||||
int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
|
||||
BlockedRange this_task(start, end, all_tasks.grain_size());
|
||||
|
||||
const Invoker& invoker = invoker_context->invoker();
|
||||
invoker(this_task);
|
||||
}
|
||||
|
||||
template <class Invoker>
|
||||
static void ParallelForGCDTask2D(void* context, size_t index) {
|
||||
ParallelInvokerGCDContext2D<Invoker>* invoker_context =
|
||||
static_cast<ParallelInvokerGCDContext2D<Invoker>*>(context);
|
||||
// Partitioning across rows.
|
||||
const BlockedRange& all_tasks = invoker_context->rows();
|
||||
int start = all_tasks.begin() + index * all_tasks.grain_size();
|
||||
int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
|
||||
BlockedRange this_task(start, end, all_tasks.grain_size());
|
||||
|
||||
const Invoker& invoker = invoker_context->invoker();
|
||||
invoker(BlockedRange2D(this_task, invoker_context->cols()));
|
||||
}
|
||||
#endif // __APPLE__
|
||||
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
// Simple wrapper for compatibility with below ParallelFor function.
|
||||
template <class Invoker>
|
||||
void SerialFor(size_t start, size_t end, size_t grain_size,
|
||||
const Invoker& invoker) {
|
||||
invoker(BlockedRange(start, end, 1));
|
||||
}
|
||||
|
||||
inline void CheckAndSetInvokerOptions() {
|
||||
#if defined(PARALLEL_INVOKER_ACTIVE)
|
||||
#if defined(__ANDROID__)
|
||||
// If unsupported option is selected, force usage of OpenMP if detected, and
|
||||
// ThreadPool otherwise.
|
||||
if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
|
||||
flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL &&
|
||||
flags_parallel_invoker_mode != PARALLEL_INVOKER_OPENMP) {
|
||||
#if defined(_OPENMP)
|
||||
LOG(WARNING) << "Unsupported invoker mode selected on Android. "
|
||||
<< "OpenMP linkage detected, so falling back to OpenMP";
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
|
||||
#else // _OPENMP
|
||||
// Fallback mode for active parallel invoker without OpenMP is ThreadPool.
|
||||
LOG(WARNING) << "Unsupported invoker mode selected on Android. "
|
||||
<< "Falling back to ThreadPool";
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
|
||||
#endif // _OPENMP
|
||||
}
|
||||
#endif // __ANDROID__
|
||||
|
||||
#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
|
||||
// Force usage of ThreadPool if unsupported option is selected.
|
||||
// (OpenMP is not supported on iOS, due to missing clang support).
|
||||
if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
|
||||
#if defined(USE_PARALLEL_INVOKER_GCD)
|
||||
flags_parallel_invoker_mode != PARALLEL_INVOKER_GCD &&
|
||||
#endif // USE_PARALLEL_INVOKER_GCD
|
||||
flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL) {
|
||||
LOG(WARNING) << "Unsupported invoker mode selected on iOS. "
|
||||
<< "Falling back to ThreadPool mode";
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
|
||||
}
|
||||
#endif // __APPLE__ || __EMSCRIPTEN__
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__EMSCRIPTEN__) && !defined(__ANDROID__)
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
|
||||
#endif // !__APPLE__ && !__EMSCRIPTEN__ && !__ANDROID__
|
||||
|
||||
// If OpenMP is requested, make sure we can actually use it, and fall back
|
||||
// to ThreadPool if not.
|
||||
if (flags_parallel_invoker_mode == PARALLEL_INVOKER_OPENMP) {
|
||||
#if !defined(_OPENMP)
|
||||
LOG(ERROR) << "OpenMP invoker mode selected but not compiling with OpenMP "
|
||||
<< "enabled. Falling back to ThreadPool";
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
|
||||
#endif // _OPENMP
|
||||
}
|
||||
|
||||
#else // PARALLEL_INVOKER_ACTIVE
|
||||
if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE) {
|
||||
LOG(ERROR) << "Parallel execution requested but PARALLEL_INVOKER_ACTIVE "
|
||||
<< "compile flag is not set. Falling back to single threaded "
|
||||
<< "execution.";
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_NONE;
|
||||
}
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
|
||||
CHECK_LT(flags_parallel_invoker_mode, PARALLEL_INVOKER_MAX_VALUE)
|
||||
<< "Invalid invoker mode specified.";
|
||||
CHECK_GE(flags_parallel_invoker_mode, 0) << "Invalid invoker mode specified.";
|
||||
}
|
||||
|
||||
// Performs parallel iteration from [start to end), scheduling grain_size
|
||||
// iterations per thread. For each iteration
|
||||
// invoker(BlockedRange(thread_local_start, thread_local_end))
|
||||
// is called. Each thread is given its local copy of invoker, i.e.
|
||||
// invoker needs to have copy constructor defined.
|
||||
template <class Invoker>
|
||||
void ParallelFor(size_t start, size_t end, size_t grain_size,
|
||||
const Invoker& invoker) {
|
||||
#ifdef PARALLEL_INVOKER_ACTIVE
|
||||
CheckAndSetInvokerOptions();
|
||||
switch (flags_parallel_invoker_mode) {
|
||||
#if defined(__APPLE__)
|
||||
case PARALLEL_INVOKER_GCD: {
|
||||
int iterations_remain = (end - start + grain_size - 1) / grain_size;
|
||||
CHECK_GT(iterations_remain, 0);
|
||||
if (iterations_remain == 1) {
|
||||
// Execute invoker serially.
|
||||
invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
|
||||
} else {
|
||||
BlockedRange all_tasks(start, end, grain_size);
|
||||
ParallelInvokerGCDContext<Invoker> context(invoker, all_tasks);
|
||||
dispatch_queue_t concurrent_queue =
|
||||
dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
dispatch_apply_f(iterations_remain, concurrent_queue, &context,
|
||||
ParallelForGCDTask<Invoker>);
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
CHECK_EQ(iterations_remain, context.count());
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif // __APPLE__
|
||||
|
||||
case PARALLEL_INVOKER_THREAD_POOL: {
|
||||
int iterations_remain = (end - start + grain_size - 1) / grain_size;
|
||||
CHECK_GT(iterations_remain, 0);
|
||||
if (iterations_remain == 1) {
|
||||
// Execute invoker serially.
|
||||
invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
|
||||
break;
|
||||
}
|
||||
|
||||
struct {
|
||||
absl::Mutex mutex;
|
||||
absl::CondVar completed;
|
||||
int iterations_remain GUARDED_BY(mutex);
|
||||
} loop;
|
||||
{
|
||||
absl::MutexLock lock(&loop.mutex);
|
||||
loop.iterations_remain = iterations_remain;
|
||||
}
|
||||
|
||||
for (int x = start; x < end; x += grain_size) {
|
||||
auto loop_func = [x, end, grain_size, &loop, invoker]() {
|
||||
// Execute invoker.
|
||||
invoker(BlockedRange(x, std::min(end, x + grain_size), 1));
|
||||
|
||||
// Decrement counter.
|
||||
absl::MutexLock lock(&loop.mutex);
|
||||
--loop.iterations_remain;
|
||||
if (loop.iterations_remain == 0) {
|
||||
loop.completed.SignalAll();
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to run in parallel, if busy run serial to avoid deadlocking.
|
||||
// This can happen during nested invocation of ParallelFor, as if the
|
||||
// loop iteration itself is calling ParallelFor we might deadlock if
|
||||
// we can not guarantee for the iteration to be scheduled.
|
||||
ParallelInvokerThreadPool()->Schedule(loop_func);
|
||||
}
|
||||
|
||||
// Wait on termination of all iterations.
|
||||
loop.mutex.Lock();
|
||||
while (loop.iterations_remain > 0) {
|
||||
loop.completed.Wait(&loop.mutex);
|
||||
}
|
||||
loop.mutex.Unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_OPENMP: {
|
||||
// Use thread-local copy of invoker.
|
||||
Invoker local_invoker(invoker);
|
||||
#pragma omp parallel for firstprivate(local_invoker) \
|
||||
num_threads(flags_parallel_invoker_max_threads)
|
||||
for (int x = start; x < end; ++x) {
|
||||
local_invoker(BlockedRange(x, x + 1, 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_NONE: {
|
||||
SerialFor(start, end, grain_size, invoker);
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_MAX_VALUE: {
|
||||
LOG(FATAL) << "Impossible.";
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
SerialFor(start, end, grain_size, invoker);
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
}
|
||||
|
||||
// Simple wrapper for compatibility with below ParallelFor2D function.
|
||||
template <class Invoker>
|
||||
void SerialFor2D(size_t start_row, size_t end_row, size_t start_col,
|
||||
size_t end_col, size_t grain_size, const Invoker& invoker) {
|
||||
invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
|
||||
BlockedRange(start_col, end_col, 1)));
|
||||
}
|
||||
|
||||
// Same as above ParallelFor for 2D iteration.
|
||||
template <class Invoker>
|
||||
void ParallelFor2D(size_t start_row, size_t end_row, size_t start_col,
|
||||
size_t end_col, size_t grain_size, const Invoker& invoker) {
|
||||
#ifdef PARALLEL_INVOKER_ACTIVE
|
||||
CheckAndSetInvokerOptions();
|
||||
switch (flags_parallel_invoker_mode) {
|
||||
#if defined(__APPLE__)
|
||||
case PARALLEL_INVOKER_GCD: {
|
||||
const int iterations_remain =
|
||||
(end_row - start_row + grain_size - 1) / grain_size;
|
||||
CHECK_GT(iterations_remain, 0);
|
||||
if (iterations_remain == 1) {
|
||||
// Execute invoker serially.
|
||||
invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
|
||||
BlockedRange(start_col, end_col, 1)));
|
||||
} else {
|
||||
BlockedRange all_tasks(start_row, end_row, grain_size);
|
||||
ParallelInvokerGCDContext2D<Invoker> context(
|
||||
invoker, all_tasks, BlockedRange(start_col, end_col, grain_size));
|
||||
dispatch_queue_t concurrent_queue =
|
||||
dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
dispatch_apply_f(iterations_remain, concurrent_queue, &context,
|
||||
ParallelForGCDTask2D<Invoker>);
|
||||
#if CHECK_GCD_PARALLEL_WORK_COUNT
|
||||
CHECK_EQ(iterations_remain, context.count());
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif // __APPLE__
|
||||
|
||||
case PARALLEL_INVOKER_THREAD_POOL: {
|
||||
int iterations_remain = end_row - start_row; // Guarded by loop_mutex
|
||||
CHECK_GT(iterations_remain, 0);
|
||||
if (iterations_remain == 1) {
|
||||
// Execute invoker serially.
|
||||
invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
|
||||
BlockedRange(start_col, end_col, 1)));
|
||||
break;
|
||||
}
|
||||
|
||||
absl::Mutex loop_mutex;
|
||||
absl::CondVar loop_completed;
|
||||
|
||||
for (int y = start_row; y < end_row; ++y) {
|
||||
auto loop_func = [y, start_col, end_col, &loop_mutex, &loop_completed,
|
||||
&iterations_remain, invoker]() {
|
||||
// Execute invoker.
|
||||
invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
|
||||
BlockedRange(start_col, end_col, 1)));
|
||||
|
||||
// Decrement counter.
|
||||
absl::MutexLock lock(&loop_mutex);
|
||||
--iterations_remain;
|
||||
if (iterations_remain == 0) {
|
||||
loop_completed.Signal();
|
||||
}
|
||||
};
|
||||
|
||||
// Attempt to run in parallel, if busy run serial to avoid deadlocking.
|
||||
ParallelInvokerThreadPool()->Schedule(loop_func);
|
||||
}
|
||||
|
||||
// Wait on termination of all iterations.
|
||||
loop_mutex.Lock();
|
||||
while (iterations_remain > 0) {
|
||||
loop_completed.Wait(&loop_mutex);
|
||||
}
|
||||
loop_mutex.Unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_OPENMP: {
|
||||
// Use thread-local copy of invoker.
|
||||
Invoker local_invoker(invoker);
|
||||
#pragma omp parallel for firstprivate(local_invoker) \
|
||||
num_threads(flags_parallel_invoker_max_threads)
|
||||
for (int y = start_row; y < end_row; ++y) {
|
||||
local_invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
|
||||
BlockedRange(start_col, end_col, 1)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_NONE: {
|
||||
SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
|
||||
break;
|
||||
}
|
||||
|
||||
case PARALLEL_INVOKER_MAX_VALUE: {
|
||||
LOG(FATAL) << "Impossible.";
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
|
||||
28
mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Guard to ensure clients do not link against both,
|
||||
// single and parallel version.
|
||||
#ifdef PARALLEL_INVOKER_ACTIVE
|
||||
int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // PARALLEL_INVOKER_ACTIVE
|
||||
|
||||
#ifdef PARALLEL_INVOKER_INACTIVE
|
||||
int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
|
||||
return 1;
|
||||
}
|
||||
#endif // PARALLEL_INVOKER_INACTIVE
|
||||
60
mediapipe/util/tracking/parallel_invoker_test.cc
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/parallel_invoker.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
void RunParallelTest() {
|
||||
absl::Mutex numbers_mutex;
|
||||
std::vector<int> numbers;
|
||||
const int kArraySize = 5000;
|
||||
|
||||
// Fill number array in parallel.
|
||||
ParallelFor(0, kArraySize, 1,
|
||||
[&numbers_mutex, &numbers](const BlockedRange& b) {
|
||||
for (int k = b.begin(); k != b.end(); ++k) {
|
||||
absl::MutexLock lock(&numbers_mutex);
|
||||
numbers.push_back(k);
|
||||
}
|
||||
});
|
||||
|
||||
std::vector<int> expected(kArraySize);
|
||||
std::iota(expected.begin(), expected.end(), 0);
|
||||
EXPECT_TRUE(
|
||||
std::is_permutation(expected.begin(), expected.end(), numbers.begin()));
|
||||
}
|
||||
|
||||
TEST(ParallelInvokerTest, PhotosTest) {
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
|
||||
|
||||
RunParallelTest();
|
||||
}
|
||||
|
||||
TEST(ParallelInvokerTest, ThreadPoolTest) {
|
||||
flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
|
||||
|
||||
// Needs to be run in opt mode to pass.
|
||||
RunParallelTest();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
1296
mediapipe/util/tracking/push_pull_filtering.h
Normal file
39
mediapipe/util/tracking/push_pull_filtering.proto
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
message PushPullOptions {
|
||||
// Sigma for color difference.
|
||||
optional float bilateral_sigma = 1 [default = 20.0];
|
||||
|
||||
// Determines how fast confident values can propagate. Filters are normalized,
|
||||
// such that confidence dissipates quickly instead of propagating.
|
||||
// To ensure confidence propagates the importance weight is scaled by the
|
||||
// scalars specified below. Larger values yield quicker propagation.
|
||||
optional float pull_propagation_scale = 3 [default = 8.0];
|
||||
optional float push_propagation_scale = 4 [default = 8.0];
|
||||
|
||||
// Above bilateral sigma is scaled at each level by the specified scale
|
||||
// (for push and pull phase). This is due to iterative downsampling of the
|
||||
// guidance image introduces errors making bilateral weighting increasingly
|
||||
// errorneous.
|
||||
optional float pull_bilateral_scale = 5 [default = 0.7];
|
||||
optional float push_bilateral_scale = 6 [default = 0.9];
|
||||
|
||||
// Deprecated fields.
|
||||
extensions 2;
|
||||
}
|
||||
799
mediapipe/util/tracking/region_flow.cc
Normal file
|
|
@ -0,0 +1,799 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/util/tracking/region_flow.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/util/tracking/measure_time.h"
|
||||
#include "mediapipe/util/tracking/parallel_invoker.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsPointWithinBounds(const Vector2_f& pt, float bounds, int frame_width,
|
||||
int frame_height) {
|
||||
// Ensure stability under float -> rounding operations.
|
||||
if (pt.x() - 0.5f >= bounds && pt.x() + 0.5f <= frame_width - 1 - bounds &&
|
||||
pt.y() - 0.5f >= bounds && pt.y() + 0.5f <= frame_height - 1 - bounds) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
void GetRegionFlowFeatureList(const RegionFlowFrame& region_flow_frame,
|
||||
int distance_from_border,
|
||||
RegionFlowFeatureList* flow_feature_list) {
|
||||
CHECK(flow_feature_list);
|
||||
flow_feature_list->clear_feature();
|
||||
const int frame_width = region_flow_frame.frame_width();
|
||||
const int frame_height = region_flow_frame.frame_height();
|
||||
flow_feature_list->set_frame_width(frame_width);
|
||||
flow_feature_list->set_frame_height(frame_height);
|
||||
flow_feature_list->set_unstable(region_flow_frame.unstable_frame());
|
||||
flow_feature_list->set_distance_from_border(distance_from_border);
|
||||
flow_feature_list->set_blur_score(region_flow_frame.blur_score());
|
||||
|
||||
for (const auto& region_flow : region_flow_frame.region_flow()) {
|
||||
for (const auto& feature : region_flow.feature()) {
|
||||
if (distance_from_border > 0) {
|
||||
if (!IsPointWithinBounds(FeatureLocation(feature), distance_from_border,
|
||||
frame_width, frame_height) ||
|
||||
!IsPointWithinBounds(FeatureMatchLocation(feature),
|
||||
distance_from_border, frame_width,
|
||||
frame_height)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
flow_feature_list->add_feature()->CopyFrom(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float RegionFlowFeatureDistance(const PatchDescriptor& patch_desc_1,
|
||||
const PatchDescriptor& patch_desc_2) {
|
||||
DCHECK_EQ(patch_desc_1.data_size(), patch_desc_2.data_size());
|
||||
DCHECK_GE(patch_desc_1.data_size(), 3);
|
||||
|
||||
constexpr int kNumMeans = 3;
|
||||
float sq_distance_sum = 0;
|
||||
for (int n = 0; n < kNumMeans; ++n) {
|
||||
const float distance = patch_desc_1.data(n) - patch_desc_2.data(n);
|
||||
sq_distance_sum += distance * distance;
|
||||
}
|
||||
|
||||
return std::sqrt(sq_distance_sum);
|
||||
}
|
||||
|
||||
void ResetRegionFlowFeatureIRLSWeights(
|
||||
float value, RegionFlowFeatureList* flow_feature_list) {
|
||||
for (auto& feature : *flow_feature_list->mutable_feature()) {
|
||||
feature.set_irls_weight(value);
|
||||
}
|
||||
}
|
||||
|
||||
double RegionFlowFeatureIRLSSum(const RegionFlowFeatureList& feature_list) {
|
||||
double sum = 0.0;
|
||||
for (const auto& feature : feature_list.feature()) {
|
||||
sum += feature.irls_weight();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
void ClampRegionFlowFeatureIRLSWeights(float lower, float upper,
|
||||
RegionFlowFeatureView* feature_view) {
|
||||
for (auto& feature_ptr : *feature_view) {
|
||||
if (feature_ptr->irls_weight() < lower) {
|
||||
feature_ptr->set_irls_weight(lower);
|
||||
} else if (feature_ptr->irls_weight() > upper) {
|
||||
feature_ptr->set_irls_weight(upper);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeRegionFlowFeatureTexturedness(
|
||||
const RegionFlowFeatureList& flow_feature_list, bool use_15percent_as_max,
|
||||
std::vector<float>* texturedness) {
|
||||
CHECK(texturedness != nullptr);
|
||||
*texturedness = std::vector<float>(flow_feature_list.feature_size(), 1.0f);
|
||||
|
||||
int texture_idx = 0;
|
||||
for (auto feature = flow_feature_list.feature().begin();
|
||||
feature != flow_feature_list.feature().end(); ++feature, ++texture_idx) {
|
||||
const float feature_stdev_l1 =
|
||||
PatchDescriptorColorStdevL1(feature->feature_descriptor());
|
||||
|
||||
if (feature_stdev_l1 < 0.0f) {
|
||||
LOG_IF(WARNING,
|
||||
[]() {
|
||||
static int k = 0;
|
||||
return k++ < 2;
|
||||
}())
|
||||
<< "Feature descriptor does not contain variance information. Was "
|
||||
<< "ComputeRegionFlowFeatureDescriptors called?";
|
||||
continue;
|
||||
}
|
||||
|
||||
// feature_stdev_l1 is within [0, 3 * 128 = 384]
|
||||
float alpha = feature_stdev_l1 / 384.f;
|
||||
|
||||
// In [0, 1], 0 = low texture, 1 = high texture. Scale such that around
|
||||
// 15% of per channel maximum stdev is considered totally textured
|
||||
// (1 / 0.15 * 3 ~ 18);
|
||||
if (use_15percent_as_max) {
|
||||
alpha = std::min(1.0f, alpha * 18.f);
|
||||
}
|
||||
|
||||
(*texturedness)[texture_idx] = alpha;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureFilteredRegionFlowFeatureIRLSWeights(
|
||||
float low_texture_threshold, float low_texture_outlier_clamp,
|
||||
RegionFlowFeatureList* flow_feature_list) {
|
||||
std::vector<float> texturedness;
|
||||
ComputeRegionFlowFeatureTexturedness(*flow_feature_list, true, &texturedness);
|
||||
|
||||
int texture_idx = 0;
|
||||
for (auto feature = flow_feature_list->mutable_feature()->begin();
|
||||
feature != flow_feature_list->mutable_feature()->end();
|
||||
++feature, ++texture_idx) {
|
||||
if (feature->irls_weight() == 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (texturedness[texture_idx] < low_texture_threshold &&
|
||||
feature->irls_weight() < low_texture_outlier_clamp) {
|
||||
feature->set_irls_weight(low_texture_outlier_clamp);
|
||||
} else {
|
||||
feature->set_irls_weight(feature->irls_weight() /
|
||||
(texturedness[texture_idx] + 1.e-6f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CornerFilteredRegionFlowFeatureIRLSWeights(
|
||||
float low_corner_threshold, float low_corner_outlier_clamp,
|
||||
RegionFlowFeatureList* flow_feature_list) {
|
||||
for (auto feature = flow_feature_list->mutable_feature()->begin();
|
||||
feature != flow_feature_list->mutable_feature()->end(); ++feature) {
|
||||
if (feature->irls_weight() == 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float corner_response = feature->corner_response();
|
||||
|
||||
if (corner_response < low_corner_threshold &&
|
||||
feature->irls_weight() < low_corner_threshold) {
|
||||
feature->set_irls_weight(low_corner_outlier_clamp);
|
||||
} else {
|
||||
feature->set_irls_weight(feature->irls_weight() /
|
||||
(corner_response + 1.e-6f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GetRegionFlowFeatureIRLSWeights(
|
||||
const RegionFlowFeatureList& flow_feature_list,
|
||||
std::vector<float>* irls_weights) {
|
||||
CHECK(irls_weights != nullptr);
|
||||
irls_weights->clear();
|
||||
irls_weights->reserve(flow_feature_list.feature_size());
|
||||
for (auto feature = flow_feature_list.feature().begin();
|
||||
feature != flow_feature_list.feature().end(); ++feature) {
|
||||
irls_weights->push_back(feature->irls_weight());
|
||||
}
|
||||
}
|
||||
|
||||
void SetRegionFlowFeatureIRLSWeights(const std::vector<float>& irls_weights,
|
||||
RegionFlowFeatureList* flow_feature_list) {
|
||||
CHECK(flow_feature_list != nullptr);
|
||||
CHECK_EQ(irls_weights.size(), flow_feature_list->feature_size());
|
||||
int idx = 0;
|
||||
for (auto feature = flow_feature_list->mutable_feature()->begin();
|
||||
feature != flow_feature_list->mutable_feature()->end();
|
||||
++feature, ++idx) {
|
||||
feature->set_irls_weight(irls_weights[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
int CountIgnoredRegionFlowFeatures(
|
||||
const RegionFlowFeatureList& flow_feature_list, float threshold) {
|
||||
int count = 0;
|
||||
for (auto feature = flow_feature_list.feature().begin();
|
||||
feature != flow_feature_list.feature().end(); ++feature) {
|
||||
if (feature->irls_weight() <= threshold) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct RegionFlowLocator {
|
||||
bool operator()(const RegionFlow& lhs, const RegionFlow& rhs) const {
|
||||
return lhs.region_id() < rhs.region_id();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace.
|
||||
|
||||
const RegionFlow* GetRegionFlowById(int region_id,
|
||||
const RegionFlowFrame& flow_frame) {
|
||||
RegionFlow region_flow;
|
||||
region_flow.set_region_id(region_id);
|
||||
|
||||
const auto& region_pos = std::lower_bound(flow_frame.region_flow().begin(),
|
||||
flow_frame.region_flow().end(),
|
||||
region_flow, RegionFlowLocator());
|
||||
|
||||
if (region_pos == flow_frame.region_flow().end() ||
|
||||
region_pos->region_id() != region_id) {
|
||||
return NULL;
|
||||
} else {
|
||||
return &(*region_pos);
|
||||
}
|
||||
}
|
||||
|
||||
RegionFlow* GetMutableRegionFlowById(int region_id,
|
||||
RegionFlowFrame* flow_frame) {
|
||||
RegionFlow region_flow;
|
||||
region_flow.set_region_id(region_id);
|
||||
|
||||
auto region_pos = std::lower_bound(flow_frame->mutable_region_flow()->begin(),
|
||||
flow_frame->mutable_region_flow()->end(),
|
||||
region_flow, RegionFlowLocator());
|
||||
|
||||
if (region_pos == flow_frame->mutable_region_flow()->end() ||
|
||||
region_pos->region_id() != region_id) {
|
||||
return NULL;
|
||||
} else {
|
||||
return &(*region_pos);
|
||||
}
|
||||
}
|
||||
|
||||
void SortRegionFlowById(RegionFlowFrame* flow_frame) {
|
||||
std::sort(flow_frame->mutable_region_flow()->begin(),
|
||||
flow_frame->mutable_region_flow()->end(), RegionFlowLocator());
|
||||
}
|
||||
|
||||
void InvertRegionFlow(const RegionFlowFrame& region_flow_frame,
|
||||
RegionFlowFrame* inverted_flow_frame) {
|
||||
CHECK(inverted_flow_frame);
|
||||
inverted_flow_frame->CopyFrom(region_flow_frame);
|
||||
for (auto& region_flow : *inverted_flow_frame->mutable_region_flow()) {
|
||||
region_flow.set_centroid_x(region_flow.centroid_x() + region_flow.flow_x());
|
||||
region_flow.set_centroid_y(region_flow.centroid_y() + region_flow.flow_y());
|
||||
region_flow.set_flow_x(-region_flow.flow_x());
|
||||
region_flow.set_flow_y(-region_flow.flow_y());
|
||||
|
||||
for (auto& feature : *region_flow.mutable_feature()) {
|
||||
feature.set_x(feature.x() + feature.dx());
|
||||
feature.set_y(feature.y() + feature.dy());
|
||||
feature.set_dx(-feature.dx());
|
||||
feature.set_dy(-feature.dy());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InvertRegionFlowFeatureList(const RegionFlowFeatureList& feature_list,
|
||||
RegionFlowFeatureList* inverted_feature_list) {
|
||||
CHECK(inverted_feature_list);
|
||||
*inverted_feature_list = feature_list;
|
||||
for (auto& feature : *inverted_feature_list->mutable_feature()) {
|
||||
InvertRegionFlowFeature(&feature);
|
||||
}
|
||||
}
|
||||
|
||||
void InvertRegionFlowFeature(RegionFlowFeature* feature) {
|
||||
Vector2_f pt_match = FeatureMatchLocation(*feature);
|
||||
feature->set_x(pt_match.x());
|
||||
feature->set_y(pt_match.y());
|
||||
Vector2_f flow = FeatureFlow(*feature);
|
||||
feature->set_dx(-flow.x());
|
||||
feature->set_dy(-flow.y());
|
||||
}
|
||||
|
||||
void LimitFeaturesToBounds(int frame_width, int frame_height, float bounds,
|
||||
RegionFlowFeatureList* feature_list) {
|
||||
RegionFlowFeatureList limited;
|
||||
for (const auto& feature : feature_list->feature()) {
|
||||
if (!IsPointWithinBounds(FeatureLocation(feature), bounds, frame_width,
|
||||
frame_height)) {
|
||||
continue;
|
||||
} else {
|
||||
limited.add_feature()->CopyFrom(feature);
|
||||
}
|
||||
}
|
||||
feature_list->Swap(&limited);
|
||||
}
|
||||
|
||||
void NormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
|
||||
const LinearSimilarityModel norm_model =
|
||||
LinearSimilarityAdapter::NormalizationTransform(
|
||||
feature_list->frame_width(), feature_list->frame_height());
|
||||
TransformRegionFlowFeatureList(norm_model, feature_list);
|
||||
}
|
||||
|
||||
void DeNormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
|
||||
const LinearSimilarityModel norm_model =
|
||||
LinearSimilarityAdapter::NormalizationTransform(
|
||||
feature_list->frame_width(), feature_list->frame_height());
|
||||
TransformRegionFlowFeatureList(ModelInvert(norm_model), feature_list);
|
||||
}
|
||||
|
||||
void ScaleSalientPoint(float scale_x, float scale_y, SalientPoint* sp) {
|
||||
sp->set_norm_point_x(sp->norm_point_x() * scale_x);
|
||||
sp->set_norm_point_y(sp->norm_point_y() * scale_y);
|
||||
|
||||
const float cos_angle = std::cos(sp->angle());
|
||||
const float sin_angle = std::sin(sp->angle());
|
||||
Vector2_f major_axis = Vector2_f(cos_angle, sin_angle) * sp->norm_major();
|
||||
Vector2_f minor_axis = Vector2_f(-sin_angle, cos_angle) * sp->norm_minor();
|
||||
|
||||
major_axis[0] *= scale_x;
|
||||
major_axis[1] *= scale_y;
|
||||
minor_axis[0] *= scale_x;
|
||||
minor_axis[1] *= scale_y;
|
||||
|
||||
sp->set_norm_major(major_axis.Norm());
|
||||
sp->set_norm_minor(minor_axis.Norm());
|
||||
sp->set_angle(std::atan2(major_axis.y(), major_axis.x()));
|
||||
if (sp->angle() < 0) {
|
||||
sp->set_angle(sp->angle() + M_PI);
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleSaliencyList(float scale, bool normalize_to_scale,
|
||||
SaliencyPointList* saliency_list) {
|
||||
CHECK(saliency_list != nullptr);
|
||||
for (auto& point_frame : *saliency_list) {
|
||||
ScaleSalientPointFrame(scale, normalize_to_scale, &point_frame);
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleSalientPointFrame(float scale, bool normalize_to_scale,
|
||||
SalientPointFrame* saliency) {
|
||||
CHECK(saliency != nullptr);
|
||||
float saliency_scale = scale;
|
||||
if (normalize_to_scale) {
|
||||
float weight_sum = 0.0f;
|
||||
for (const auto& salient_point : saliency->point()) {
|
||||
weight_sum += salient_point.weight();
|
||||
}
|
||||
|
||||
if (weight_sum > 1e-6f) {
|
||||
saliency_scale /= weight_sum;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& salient_point : *saliency->mutable_point()) {
|
||||
salient_point.set_weight(salient_point.weight() * saliency_scale);
|
||||
}
|
||||
}
|
||||
|
||||
void ResetSaliencyBounds(float left, float bottom, float right, float top,
|
||||
SaliencyPointList* saliency_list) {
|
||||
CHECK(saliency_list != nullptr);
|
||||
for (auto& point_frame : *saliency_list) {
|
||||
for (auto& salient_point : *point_frame.mutable_point()) {
|
||||
salient_point.set_left(left);
|
||||
salient_point.set_bottom(bottom);
|
||||
salient_point.set_right(right);
|
||||
salient_point.set_top(top);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool EllipseFromCovariance(float a, float bc, float d,
|
||||
Vector2_f* axis_magnitude, float* angle) {
|
||||
CHECK(axis_magnitude != nullptr);
|
||||
CHECK(angle != nullptr);
|
||||
|
||||
// Get trace and determinant
|
||||
const float trace = a + d;
|
||||
const float det = a * d - bc * bc;
|
||||
|
||||
// If area is very small (small axis in at least one direction)
|
||||
// axis are unreliable -> return false.
|
||||
if (det < 4) { // Measured in sq. pixels.
|
||||
*axis_magnitude = Vector2_f(1, 1);
|
||||
*angle = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
const float discriminant = std::max(0.f, trace * trace * 0.25f - det);
|
||||
const float sqrt_discrm = std::sqrt(discriminant);
|
||||
|
||||
// Get eigenvalues.
|
||||
float eig_1 = trace * 0.5f + sqrt_discrm;
|
||||
float eig_2 = trace * 0.5f - sqrt_discrm;
|
||||
|
||||
// Compute eigenvectors.
|
||||
Vector2_f vec_1, vec_2;
|
||||
|
||||
if (fabs(bc) < 1e-6) {
|
||||
// Right-most case, we already have diagonal matrix.
|
||||
vec_1.Set(1, 0);
|
||||
vec_2.Set(0, 1);
|
||||
} else {
|
||||
vec_1.Set(eig_1 - d, bc);
|
||||
vec_2.Set(eig_2 - d, bc);
|
||||
|
||||
// Normalize. Norm is always > 0, as bc is > 0 via above if.
|
||||
vec_1 /= vec_1.Norm();
|
||||
vec_2 /= vec_2.Norm();
|
||||
}
|
||||
|
||||
// Select positive eigenvector.
|
||||
if (eig_1 < 0) {
|
||||
eig_1 *= -1.f;
|
||||
}
|
||||
|
||||
if (eig_2 < 0) {
|
||||
eig_2 *= -1.f;
|
||||
}
|
||||
|
||||
// Major first.
|
||||
if (eig_1 < eig_2) {
|
||||
using std::swap;
|
||||
swap(vec_1, vec_2);
|
||||
swap(eig_1, eig_2);
|
||||
}
|
||||
|
||||
*axis_magnitude = Vector2_f(std::sqrt(eig_1), std::sqrt(eig_2));
|
||||
*angle = std::atan2(vec_1.y(), vec_1.x());
|
||||
|
||||
return eig_2 >= 1.5f; // Measurement in pixels.
|
||||
}
|
||||
|
||||
void BoundingBoxFromEllipse(const Vector2_f& center, float norm_major_axis,
|
||||
float norm_minor_axis, float angle,
|
||||
std::vector<Vector2_f>* bounding_box) {
|
||||
CHECK(bounding_box != nullptr);
|
||||
float dim_x;
|
||||
float dim_y;
|
||||
if (angle < M_PI * 0.25 || angle > M_PI * 0.75) {
|
||||
dim_x = norm_major_axis;
|
||||
dim_y = norm_minor_axis;
|
||||
} else {
|
||||
dim_y = norm_major_axis;
|
||||
dim_x = norm_minor_axis;
|
||||
}
|
||||
|
||||
// Construct bounding for for axes aligned ellipse.
|
||||
*bounding_box = {
|
||||
Vector2_f(-dim_x, -dim_y),
|
||||
Vector2_f(-dim_x, dim_y),
|
||||
Vector2_f(dim_x, dim_y),
|
||||
Vector2_f(dim_x, -dim_y),
|
||||
};
|
||||
|
||||
for (Vector2_f& corner : *bounding_box) {
|
||||
corner += center;
|
||||
}
|
||||
}
|
||||
|
||||
void CopyToEmptyFeatureList(RegionFlowFeatureList* src,
|
||||
RegionFlowFeatureList* dst) {
|
||||
CHECK(src != nullptr);
|
||||
CHECK(dst != nullptr);
|
||||
|
||||
// Swap out features for empty list.
|
||||
RegionFlowFeatureList empty_list;
|
||||
empty_list.mutable_feature()->Swap(src->mutable_feature());
|
||||
|
||||
// Copy.
|
||||
dst->CopyFrom(*src);
|
||||
|
||||
// Swap back.
|
||||
src->mutable_feature()->Swap(empty_list.mutable_feature());
|
||||
|
||||
// src_features should be empty as in the beginning.
|
||||
CHECK_EQ(0, empty_list.feature_size());
|
||||
}
|
||||
|
||||
void IntersectRegionFlowFeatureList(
|
||||
const RegionFlowFeatureList& to,
|
||||
std::function<Vector2_f(const RegionFlowFeature&)> to_location_eval,
|
||||
RegionFlowFeatureList* from, RegionFlowFeatureList* result,
|
||||
std::vector<int>* source_indices) {
|
||||
CHECK(from != nullptr);
|
||||
CHECK(result != nullptr);
|
||||
CHECK(from->long_tracks()) << "Intersection only works for long features";
|
||||
CHECK(to.long_tracks()) << "Intersection only works for long features";
|
||||
|
||||
// Hash features in to, based on track_id.
|
||||
std::unordered_map<int, const RegionFlowFeature*> track_map;
|
||||
for (const auto& feature : to.feature()) {
|
||||
track_map[feature.track_id()] = &feature;
|
||||
}
|
||||
|
||||
// Initialize result.
|
||||
CopyToEmptyFeatureList(from, result);
|
||||
const int num_from_features = from->feature_size();
|
||||
result->mutable_feature()->Reserve(num_from_features);
|
||||
|
||||
int feature_idx = 0;
|
||||
for (const auto& feature : from->feature()) {
|
||||
auto find_location = track_map.find(feature.track_id());
|
||||
if (find_location != track_map.end()) {
|
||||
const Vector2_f diff =
|
||||
to_location_eval(*find_location->second) - FeatureLocation(feature);
|
||||
|
||||
RegionFlowFeature* new_feature = result->add_feature();
|
||||
new_feature->CopyFrom(feature);
|
||||
new_feature->set_dx(diff.x());
|
||||
new_feature->set_dy(diff.y());
|
||||
if (source_indices != nullptr) {
|
||||
source_indices->push_back(feature_idx);
|
||||
}
|
||||
}
|
||||
|
||||
++feature_idx;
|
||||
}
|
||||
}
|
||||
|
||||
void LongFeatureStream::AddFeatures(const RegionFlowFeatureList& feature_list,
|
||||
bool check_connectivity,
|
||||
bool purge_non_present_features) {
|
||||
if (!feature_list.long_tracks()) {
|
||||
LOG(ERROR) << "Feature stream should be used only used with long feature "
|
||||
<< "tracks. Ensure POLICY_LONG_FEATURE was used for "
|
||||
<< "RegionFlowComputation.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (feature_list.match_frame() == 0) {
|
||||
// Skip first frame.
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::abs(feature_list.match_frame()) != 1) {
|
||||
LOG(ERROR) << "Only matching frames one frame from current one are "
|
||||
<< "supported";
|
||||
return;
|
||||
}
|
||||
|
||||
// Record id of each track that is present in the current feature_list.
|
||||
std::unordered_set<int> present_tracks;
|
||||
for (auto feature : feature_list.feature()) { // Copy feature.
|
||||
if (feature.track_id() < 0) {
|
||||
LOG_IF(WARNING, []() {
|
||||
static int k = 0;
|
||||
return k++ < 2;
|
||||
}()) << "Feature does not have a valid track id assigned. Ignoring.";
|
||||
continue;
|
||||
}
|
||||
present_tracks.insert(feature.track_id());
|
||||
if (check_connectivity) {
|
||||
// A new feature should never have been erased before.
|
||||
CHECK(old_ids_.find(feature.track_id()) == old_ids_.end())
|
||||
<< "Feature : " << feature.track_id() << "was already removed.";
|
||||
}
|
||||
|
||||
// Invert the features to be foward or backward according to forward_ flag.
|
||||
if ((!forward_ && feature_list.match_frame() > 0) ||
|
||||
(forward_ && feature_list.match_frame() < 0)) {
|
||||
InvertRegionFlowFeature(&feature);
|
||||
}
|
||||
|
||||
auto find_pos = tracks_.find(feature.track_id());
|
||||
if (find_pos != tracks_.end()) {
|
||||
// Track is present, add to it.
|
||||
if (check_connectivity) {
|
||||
CHECK_LT((FeatureLocation(find_pos->second.back()) -
|
||||
FeatureMatchLocation(feature))
|
||||
.Norm2(),
|
||||
1e-4);
|
||||
}
|
||||
find_pos->second.push_back(feature);
|
||||
} else {
|
||||
tracks_[feature.track_id()] = std::vector<RegionFlowFeature>(1, feature);
|
||||
}
|
||||
}
|
||||
|
||||
if (purge_non_present_features) {
|
||||
std::vector<int> to_be_removed;
|
||||
for (const auto& track : tracks_) {
|
||||
if (present_tracks.find(track.first) == present_tracks.end()) {
|
||||
to_be_removed.push_back(track.first);
|
||||
if (check_connectivity) {
|
||||
old_ids_.insert(track.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int id : to_be_removed) {
|
||||
tracks_.erase(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LongFeatureStream::FlattenTrack(
|
||||
const std::vector<RegionFlowFeature>& features,
|
||||
std::vector<Vector2_f>* result, std::vector<float>* irls_weight,
|
||||
std::vector<Vector2_f>* flow) const {
|
||||
CHECK(result != nullptr);
|
||||
if (features.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (irls_weight) {
|
||||
irls_weight->clear();
|
||||
}
|
||||
|
||||
if (flow) {
|
||||
flow->clear();
|
||||
}
|
||||
|
||||
if (!forward_) {
|
||||
// Backward tracking, add first match.
|
||||
result->push_back(FeatureMatchLocation(features[0]));
|
||||
}
|
||||
|
||||
for (const auto& feature : features) {
|
||||
result->push_back(FeatureLocation(feature));
|
||||
if (flow) {
|
||||
flow->push_back(FeatureFlow(feature));
|
||||
}
|
||||
if (irls_weight) {
|
||||
irls_weight->push_back(feature.irls_weight());
|
||||
}
|
||||
}
|
||||
|
||||
if (forward_) {
|
||||
// Forward tracking, add last match.
|
||||
result->push_back(FeatureMatchLocation(features.back()));
|
||||
}
|
||||
|
||||
// Replicate last irls weight.
|
||||
if (irls_weight) {
|
||||
irls_weight->push_back(irls_weight->back());
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<RegionFlowFeature>* LongFeatureStream::TrackById(
|
||||
int id) const {
|
||||
auto track_pos = tracks_.find(id);
|
||||
if (track_pos == tracks_.end()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return &track_pos->second;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Vector2_f> LongFeatureStream::FlattenedTrackById(int id) const {
|
||||
const auto* track = TrackById(id);
|
||||
if (track != nullptr) {
|
||||
std::vector<Vector2_f> points;
|
||||
FlattenTrack(*track, &points, nullptr, nullptr);
|
||||
return points;
|
||||
} else {
|
||||
return std::vector<Vector2_f>();
|
||||
}
|
||||
}
|
||||
|
||||
void LongFeatureInfo::AddFeatures(const RegionFlowFeatureList& feature_list) {
|
||||
if (!feature_list.long_tracks()) {
|
||||
LOG(ERROR) << "Passed feature list was not computed with long tracks. ";
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto& feature : feature_list.feature()) {
|
||||
AddFeature(feature);
|
||||
}
|
||||
|
||||
IncrementFrame();
|
||||
}
|
||||
|
||||
void LongFeatureInfo::AddFeature(const RegionFlowFeature& feature) {
|
||||
if (feature.irls_weight() == 0) { // Skip outliers.
|
||||
return;
|
||||
}
|
||||
const int track_id = feature.track_id();
|
||||
if (track_id < 0) { // Skip unassigned ids.
|
||||
return;
|
||||
}
|
||||
auto insert_pos = track_info_.find(track_id);
|
||||
if (insert_pos == track_info_.end()) {
|
||||
track_info_[track_id].start = num_frames_;
|
||||
track_info_[track_id].length = 1;
|
||||
} else {
|
||||
++insert_pos->second.length;
|
||||
}
|
||||
}
|
||||
|
||||
void LongFeatureInfo::TrackLengths(const RegionFlowFeatureList& feature_list,
|
||||
std::vector<int>* track_lengths) const {
|
||||
CHECK(track_lengths);
|
||||
const int feature_size = feature_list.feature_size();
|
||||
track_lengths->resize(feature_size);
|
||||
for (int k = 0; k < feature_size; ++k) {
|
||||
(*track_lengths)[k] = TrackLength(feature_list.feature(k));
|
||||
}
|
||||
}
|
||||
|
||||
int LongFeatureInfo::TrackLength(const RegionFlowFeature& feature) const {
|
||||
const auto insert_pos = track_info_.find(feature.track_id());
|
||||
return insert_pos != track_info_.end() ? insert_pos->second.length : 0;
|
||||
}
|
||||
|
||||
int LongFeatureInfo::TrackStart(const RegionFlowFeature& feature) const {
|
||||
const auto insert_pos = track_info_.find(feature.track_id());
|
||||
return insert_pos != track_info_.end() ? insert_pos->second.start : -1;
|
||||
}
|
||||
|
||||
void LongFeatureInfo::Reset() {
|
||||
track_info_.clear();
|
||||
num_frames_ = 0;
|
||||
}
|
||||
|
||||
int LongFeatureInfo::GlobalTrackLength(float percentile) const {
|
||||
std::vector<int> track_lengths;
|
||||
track_lengths.reserve(track_info_.size());
|
||||
|
||||
for (const auto& pair : track_info_) {
|
||||
track_lengths.push_back(pair.second.length);
|
||||
}
|
||||
|
||||
if (track_lengths.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto percentile_item =
|
||||
track_lengths.begin() + percentile * track_lengths.size();
|
||||
|
||||
std::nth_element(track_lengths.begin(), percentile_item, track_lengths.end());
|
||||
|
||||
return *percentile_item;
|
||||
}
|
||||
|
||||
void GridTaps(int dim_x, int dim_y, int tap_radius,
|
||||
std::vector<std::vector<int>>* taps) {
|
||||
CHECK(taps);
|
||||
const int grid_size = dim_x * dim_y;
|
||||
const int diam = 2 * tap_radius + 1;
|
||||
taps->resize(grid_size);
|
||||
|
||||
for (int i = 0; i < dim_y; ++i) {
|
||||
for (int j = 0; j < dim_x; ++j) {
|
||||
std::vector<int>& grid_bin = (*taps)[i * dim_x + j];
|
||||
grid_bin.clear();
|
||||
grid_bin.reserve(diam * diam);
|
||||
for (int k = std::max(0, i - tap_radius),
|
||||
end_k = std::min(dim_y - 1, i + tap_radius);
|
||||
k <= end_k; ++k) {
|
||||
for (int l = std::max(0, j - tap_radius),
|
||||
end_l = std::min(dim_x - 1, j + tap_radius);
|
||||
l <= end_l; ++l) {
|
||||
grid_bin.push_back(k * dim_x + l);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||