210 lines
7.9 KiB
C++
210 lines
7.9 KiB
C++
// Copyright 2020 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "absl/memory/memory.h"
|
|
#include "absl/synchronization/blocking_counter.h"
|
|
#include "mediapipe/calculators/image/feature_detector_calculator.pb.h"
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/framework/formats/image_frame.h"
|
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
|
#include "mediapipe/framework/formats/video_stream_header.h"
|
|
#include "mediapipe/framework/port/integral_types.h"
|
|
#include "mediapipe/framework/port/logging.h"
|
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
|
#include "mediapipe/framework/port/opencv_features2d_inc.h"
|
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
#include "mediapipe/framework/port/status.h"
|
|
#include "mediapipe/framework/port/threadpool.h"
|
|
#include "mediapipe/framework/tool/options_util.h"
|
|
#include "tensorflow/lite/interpreter.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
const char kOptionsTag[] = "OPTIONS";
|
|
const int kPatchSize = 32;
|
|
const int kNumThreads = 16;
|
|
|
|
// A calculator to apply local feature detection.
|
|
// Input stream:
|
|
// IMAGE: Input image frame of type ImageFrame from video stream.
|
|
// Output streams:
|
|
// FEATURES: The detected keypoints from input image as vector<cv::KeyPoint>.
|
|
// PATCHES: Optional output the extracted patches as vector<cv::Mat>
|
|
class FeatureDetectorCalculator : public CalculatorBase {
|
|
public:
|
|
~FeatureDetectorCalculator() override = default;
|
|
|
|
static absl::Status GetContract(CalculatorContract* cc);
|
|
|
|
absl::Status Open(CalculatorContext* cc) override;
|
|
absl::Status Process(CalculatorContext* cc) override;
|
|
|
|
private:
|
|
FeatureDetectorCalculatorOptions options_;
|
|
cv::Ptr<cv::Feature2D> feature_detector_;
|
|
std::unique_ptr<mediapipe::ThreadPool> pool_;
|
|
|
|
// Create image pyramid based on input image.
|
|
void ComputeImagePyramid(const cv::Mat& input_image,
|
|
std::vector<cv::Mat>* image_pyramid);
|
|
|
|
// Extract the patch for single feature with image pyramid.
|
|
cv::Mat ExtractPatch(const cv::KeyPoint& feature,
|
|
const std::vector<cv::Mat>& image_pyramid);
|
|
};
|
|
|
|
REGISTER_CALCULATOR(FeatureDetectorCalculator);
|
|
|
|
absl::Status FeatureDetectorCalculator::GetContract(CalculatorContract* cc) {
|
|
if (cc->Inputs().HasTag("IMAGE")) {
|
|
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
|
|
}
|
|
if (cc->Outputs().HasTag("FEATURES")) {
|
|
cc->Outputs().Tag("FEATURES").Set<std::vector<cv::KeyPoint>>();
|
|
}
|
|
if (cc->Outputs().HasTag("LANDMARKS")) {
|
|
cc->Outputs().Tag("LANDMARKS").Set<NormalizedLandmarkList>();
|
|
}
|
|
if (cc->Outputs().HasTag("PATCHES")) {
|
|
cc->Outputs().Tag("PATCHES").Set<std::vector<TfLiteTensor>>();
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status FeatureDetectorCalculator::Open(CalculatorContext* cc) {
|
|
options_ =
|
|
tool::RetrieveOptions(cc->Options(), cc->InputSidePackets(), kOptionsTag)
|
|
.GetExtension(FeatureDetectorCalculatorOptions::ext);
|
|
feature_detector_ = cv::ORB::create(
|
|
options_.max_features(), options_.scale_factor(),
|
|
options_.pyramid_level(), kPatchSize - 1, 0, 2, cv::ORB::FAST_SCORE);
|
|
pool_ = absl::make_unique<mediapipe::ThreadPool>("ThreadPool", kNumThreads);
|
|
pool_->StartWorkers();
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status FeatureDetectorCalculator::Process(CalculatorContext* cc) {
|
|
const Timestamp& timestamp = cc->InputTimestamp();
|
|
if (timestamp == Timestamp::PreStream()) {
|
|
// Indicator packet.
|
|
return absl::OkStatus();
|
|
}
|
|
InputStream* input_frame = &(cc->Inputs().Tag("IMAGE"));
|
|
cv::Mat input_view = formats::MatView(&input_frame->Get<ImageFrame>());
|
|
cv::Mat grayscale_view;
|
|
cv::cvtColor(input_view, grayscale_view, cv::COLOR_RGB2GRAY);
|
|
|
|
std::vector<cv::KeyPoint> keypoints;
|
|
feature_detector_->detect(grayscale_view, keypoints);
|
|
if (keypoints.size() > options_.max_features()) {
|
|
keypoints.resize(options_.max_features());
|
|
}
|
|
|
|
if (cc->Outputs().HasTag("FEATURES")) {
|
|
auto features_ptr = absl::make_unique<std::vector<cv::KeyPoint>>(keypoints);
|
|
cc->Outputs().Tag("FEATURES").Add(features_ptr.release(), timestamp);
|
|
}
|
|
|
|
if (cc->Outputs().HasTag("LANDMARKS")) {
|
|
auto landmarks_ptr = absl::make_unique<NormalizedLandmarkList>();
|
|
for (int j = 0; j < keypoints.size(); ++j) {
|
|
auto feature_landmark = landmarks_ptr->add_landmark();
|
|
feature_landmark->set_x(keypoints[j].pt.x / grayscale_view.cols);
|
|
feature_landmark->set_y(keypoints[j].pt.y / grayscale_view.rows);
|
|
}
|
|
cc->Outputs().Tag("LANDMARKS").Add(landmarks_ptr.release(), timestamp);
|
|
}
|
|
|
|
if (cc->Outputs().HasTag("PATCHES")) {
|
|
std::vector<cv::Mat> image_pyramid;
|
|
ComputeImagePyramid(grayscale_view, &image_pyramid);
|
|
std::vector<cv::Mat> patch_mat;
|
|
patch_mat.resize(keypoints.size());
|
|
absl::BlockingCounter counter(keypoints.size());
|
|
for (int i = 0; i < keypoints.size(); i++) {
|
|
pool_->Schedule(
|
|
[this, &image_pyramid, &keypoints, &patch_mat, i, &counter] {
|
|
patch_mat[i] = ExtractPatch(keypoints[i], image_pyramid);
|
|
counter.DecrementCount();
|
|
});
|
|
}
|
|
counter.Wait();
|
|
const int batch_size = options_.max_features();
|
|
auto patches = absl::make_unique<std::vector<TfLiteTensor>>();
|
|
TfLiteTensor tensor;
|
|
tensor.type = kTfLiteFloat32;
|
|
tensor.dims = TfLiteIntArrayCreate(4);
|
|
tensor.dims->data[0] = batch_size;
|
|
tensor.dims->data[1] = kPatchSize;
|
|
tensor.dims->data[2] = kPatchSize;
|
|
tensor.dims->data[3] = 1;
|
|
int num_bytes = batch_size * kPatchSize * kPatchSize * sizeof(float);
|
|
tensor.data.data = malloc(num_bytes);
|
|
tensor.bytes = num_bytes;
|
|
tensor.allocation_type = kTfLiteArenaRw;
|
|
float* tensor_buffer = tensor.data.f;
|
|
for (int i = 0; i < keypoints.size(); i++) {
|
|
for (int j = 0; j < patch_mat[i].rows; ++j) {
|
|
for (int k = 0; k < patch_mat[i].cols; ++k) {
|
|
*tensor_buffer++ = patch_mat[i].at<uchar>(j, k) / 128.0f - 1.0f;
|
|
}
|
|
}
|
|
}
|
|
for (int i = keypoints.size() * kPatchSize * kPatchSize; i < num_bytes / 4;
|
|
i++) {
|
|
*tensor_buffer++ = 0;
|
|
}
|
|
|
|
patches->emplace_back(tensor);
|
|
cc->Outputs().Tag("PATCHES").Add(patches.release(), timestamp);
|
|
}
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
void FeatureDetectorCalculator::ComputeImagePyramid(
|
|
const cv::Mat& input_image, std::vector<cv::Mat>* image_pyramid) {
|
|
cv::Mat tmp_image = input_image;
|
|
cv::Mat src_image = input_image;
|
|
for (int i = 0; i < options_.pyramid_level(); ++i) {
|
|
image_pyramid->push_back(src_image);
|
|
cv::resize(src_image, tmp_image, cv::Size(), 1.0f / options_.scale_factor(),
|
|
1.0f / options_.scale_factor());
|
|
src_image = tmp_image;
|
|
}
|
|
}
|
|
|
|
cv::Mat FeatureDetectorCalculator::ExtractPatch(
|
|
const cv::KeyPoint& feature, const std::vector<cv::Mat>& image_pyramid) {
|
|
cv::Mat img = image_pyramid[feature.octave];
|
|
float scale_factor = 1 / pow(options_.scale_factor(), feature.octave);
|
|
cv::Point2f center =
|
|
cv::Point2f(feature.pt.x * scale_factor, feature.pt.y * scale_factor);
|
|
cv::Mat rot = cv::getRotationMatrix2D(center, feature.angle, 1.0);
|
|
rot.at<double>(0, 2) += kPatchSize / 2 - center.x;
|
|
rot.at<double>(1, 2) += kPatchSize / 2 - center.y;
|
|
cv::Mat cropped_img;
|
|
// perform the affine transformation
|
|
cv::warpAffine(img, cropped_img, rot, cv::Size(kPatchSize, kPatchSize),
|
|
cv::INTER_LINEAR);
|
|
return cropped_img;
|
|
}
|
|
|
|
} // namespace mediapipe
|