b899d17f18
GitOrigin-RevId: 8e1da4611d93ccb7d9674713157d43be0348d98f
155 lines
5.7 KiB
C++
155 lines
5.7 KiB
C++
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include <cmath>
|
|
#include <vector>
|
|
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/framework/formats/detection.pb.h"
|
|
#include "mediapipe/framework/formats/location.h"
|
|
#include "mediapipe/framework/port/ret_check.h"
|
|
|
|
namespace mediapipe {
|
|
|
|
namespace {
|
|
|
|
constexpr char kDetectionsTag[] = "DETECTIONS";
|
|
constexpr char kLetterboxPaddingTag[] = "LETTERBOX_PADDING";
|
|
|
|
} // namespace
|
|
|
|
// Adjusts detection locations on a letterboxed image to the corresponding
|
|
// locations on the same image with the letterbox removed. This is useful to map
|
|
// the detections inferred from a letterboxed image, for example, output of
|
|
// the ImageTransformationCalculator when the scale mode is FIT, back to the
|
|
// corresponding input image before letterboxing.
|
|
//
|
|
// Input:
|
|
// DETECTIONS: An std::vector<Detection> representing detections on an
|
|
// letterboxed image.
|
|
//
|
|
// LETTERBOX_PADDING: An std::array<float, 4> representing the letterbox
|
|
// padding from the 4 sides ([left, top, right, bottom]) of the letterboxed
|
|
// image, normalized to [0.f, 1.f] by the letterboxed image dimensions.
|
|
//
|
|
// Output:
|
|
// DETECTIONS: An std::vector<Detection> representing detections with their
|
|
// locations adjusted to the letterbox-removed (non-padded) image.
|
|
//
|
|
// Usage example:
|
|
// node {
|
|
// calculator: "DetectionLetterboxRemovalCalculator"
|
|
// input_stream: "DETECTIONS:detections"
|
|
// input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
// output_stream: "DETECTIONS:adjusted_detections"
|
|
// }
|
|
class DetectionLetterboxRemovalCalculator : public CalculatorBase {
|
|
public:
|
|
static absl::Status GetContract(CalculatorContract* cc) {
|
|
RET_CHECK(cc->Inputs().HasTag(kDetectionsTag) &&
|
|
cc->Inputs().HasTag(kLetterboxPaddingTag))
|
|
<< "Missing one or more input streams.";
|
|
|
|
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
|
cc->Inputs().Tag(kLetterboxPaddingTag).Set<std::array<float, 4>>();
|
|
|
|
cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status Open(CalculatorContext* cc) override {
|
|
cc->SetOffset(TimestampDiff(0));
|
|
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status Process(CalculatorContext* cc) override {
|
|
// Only process if there's input detections.
|
|
if (cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
const auto& input_detections =
|
|
cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
|
|
const auto& letterbox_padding =
|
|
cc->Inputs().Tag(kLetterboxPaddingTag).Get<std::array<float, 4>>();
|
|
|
|
const float left = letterbox_padding[0];
|
|
const float top = letterbox_padding[1];
|
|
const float left_and_right = letterbox_padding[0] + letterbox_padding[2];
|
|
const float top_and_bottom = letterbox_padding[1] + letterbox_padding[3];
|
|
|
|
auto output_detections = absl::make_unique<std::vector<Detection>>();
|
|
for (const auto& detection : input_detections) {
|
|
Detection new_detection;
|
|
new_detection.CopyFrom(detection);
|
|
LocationData::RelativeBoundingBox* relative_bbox =
|
|
new_detection.mutable_location_data()
|
|
->mutable_relative_bounding_box();
|
|
|
|
relative_bbox->set_xmin(
|
|
(detection.location_data().relative_bounding_box().xmin() - left) /
|
|
(1.0f - left_and_right));
|
|
relative_bbox->set_ymin(
|
|
(detection.location_data().relative_bounding_box().ymin() - top) /
|
|
(1.0f - top_and_bottom));
|
|
// The size of the bounding box will change as well.
|
|
relative_bbox->set_width(
|
|
detection.location_data().relative_bounding_box().width() /
|
|
(1.0f - left_and_right));
|
|
relative_bbox->set_height(
|
|
detection.location_data().relative_bounding_box().height() /
|
|
(1.0f - top_and_bottom));
|
|
|
|
// Adjust keypoints as well.
|
|
for (int i = 0;
|
|
i < new_detection.mutable_location_data()->relative_keypoints_size();
|
|
++i) {
|
|
auto* keypoint =
|
|
new_detection.mutable_location_data()->mutable_relative_keypoints(
|
|
i);
|
|
const float new_x = (keypoint->x() - left) / (1.0f - left_and_right);
|
|
const float new_y = (keypoint->y() - top) / (1.0f - top_and_bottom);
|
|
keypoint->set_x(new_x);
|
|
keypoint->set_y(new_y);
|
|
}
|
|
|
|
output_detections->emplace_back(new_detection);
|
|
}
|
|
|
|
cc->Outputs()
|
|
.Tag(kDetectionsTag)
|
|
.Add(output_detections.release(), cc->InputTimestamp());
|
|
return absl::OkStatus();
|
|
}
|
|
};
|
|
REGISTER_CALCULATOR(DetectionLetterboxRemovalCalculator);
|
|
|
|
} // namespace mediapipe
|