Migrate landmarks deduplication to mediapipe tasks.
PiperOrigin-RevId: 479681836
This commit is contained in:
parent
d90daa859f
commit
b616bc4427
22
mediapipe/tasks/cc/components/containers/BUILD
Normal file
22
mediapipe/tasks/cc/components/containers/BUILD
Normal file
|
@ -0,0 +1,22 @@
|
|||
# Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
cc_library(
|
||||
name = "landmarks_detection",
|
||||
hdrs = ["landmarks_detection.h"],
|
||||
)
|
|
@ -0,0 +1,43 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_
|
||||
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
// Sturcts holding landmarks related data structure for hand landmarker, pose
|
||||
// detector, face mesher, etc.
|
||||
namespace mediapipe::tasks::components::containers {
|
||||
|
||||
// x and y are in [0,1] range with origin in top left in input image space.
|
||||
// If model provides z, z is in the same scale as x. origin is in the center
|
||||
// of the face.
|
||||
struct Landmark {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
|
||||
// [0, 1] range in input image space
|
||||
struct Bound {
|
||||
float left;
|
||||
float top;
|
||||
float right;
|
||||
float bottom;
|
||||
};
|
||||
|
||||
} // namespace mediapipe::tasks::components::containers
|
||||
#endif // MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_
|
|
@ -80,6 +80,7 @@ cc_library(
|
|||
"//mediapipe/calculators/core:gate_calculator_cc_proto",
|
||||
"//mediapipe/calculators/core:pass_through_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
|
@ -98,6 +99,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
||||
],
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
package(default_visibility = [
|
||||
"//mediapipe/app/xeno:__subpackages__",
|
||||
"//mediapipe/tasks:internal",
|
||||
])
|
||||
|
||||
|
@ -46,4 +45,26 @@ cc_library(
|
|||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# TODO: Enable this test
|
||||
cc_library(
|
||||
name = "hand_landmarks_deduplication_calculator",
|
||||
srcs = ["hand_landmarks_deduplication_calculator.cc"],
|
||||
hdrs = ["hand_landmarks_deduplication_calculator.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
"//mediapipe/framework/api2:node",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/tasks/cc/components/containers:landmarks_detection",
|
||||
"//mediapipe/tasks/cc/vision/utils:landmarks_duplicates_finder",
|
||||
"//mediapipe/tasks/cc/vision/utils:landmarks_utils",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
@ -0,0 +1,310 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/algorithm/container.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
|
||||
|
||||
namespace mediapipe::api2 {
|
||||
namespace {
|
||||
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::tasks::components::containers::Bound;
|
||||
using ::mediapipe::tasks::vision::utils::CalculateIOU;
|
||||
using ::mediapipe::tasks::vision::utils::DuplicatesFinder;
|
||||
|
||||
float Distance(const NormalizedLandmark& lm_a, const NormalizedLandmark& lm_b,
|
||||
int width, int height) {
|
||||
return std::sqrt(std::pow((lm_a.x() - lm_b.x()) * width, 2) +
|
||||
std::pow((lm_a.y() - lm_b.y()) * height, 2));
|
||||
}
|
||||
|
||||
absl::StatusOr<std::vector<float>> Distances(const NormalizedLandmarkList& a,
|
||||
const NormalizedLandmarkList& b,
|
||||
int width, int height) {
|
||||
const int num = a.landmark_size();
|
||||
RET_CHECK_EQ(b.landmark_size(), num);
|
||||
std::vector<float> distances;
|
||||
distances.reserve(num);
|
||||
for (int i = 0; i < num; ++i) {
|
||||
const NormalizedLandmark& lm_a = a.landmark(i);
|
||||
const NormalizedLandmark& lm_b = b.landmark(i);
|
||||
distances.push_back(Distance(lm_a, lm_b, width, height));
|
||||
}
|
||||
return distances;
|
||||
}
|
||||
|
||||
// Calculates a baseline distance of a hand that can be used as a relative
|
||||
// measure when calculating hand to hand similarity.
|
||||
//
|
||||
// Calculated as maximum of distances: 0->5, 5->17, 17->0, where 0, 5, 17 key
|
||||
// points are depicted below:
|
||||
//
|
||||
// /Middle/
|
||||
// |
|
||||
// /Index/ | /Ring/
|
||||
// | | | /Pinky/
|
||||
// V V V |
|
||||
// V
|
||||
// [8] [12] [16]
|
||||
// | | | [20]
|
||||
// | | | |
|
||||
// /Thumb/ | | | |
|
||||
// | [7] [11] [15] [19]
|
||||
// V | | | |
|
||||
// | | | |
|
||||
// [4] | | | |
|
||||
// | [6] [10] [14] [18]
|
||||
// | | | | |
|
||||
// | | | | |
|
||||
// [3] | | | |
|
||||
// | [5]----[9]---[13]---[17]
|
||||
// . | |
|
||||
// \ . |
|
||||
// \ / |
|
||||
// [2] |
|
||||
// \ |
|
||||
// \ |
|
||||
// \ |
|
||||
// [1] .
|
||||
// \ /
|
||||
// \ /
|
||||
// ._____[0]_____.
|
||||
//
|
||||
// ^
|
||||
// |
|
||||
// /Wrist/
|
||||
absl::StatusOr<float> HandBaselineDistance(
|
||||
const NormalizedLandmarkList& landmarks, int width, int height) {
|
||||
RET_CHECK_EQ(landmarks.landmark_size(), 21); // Num of hand landmarks.
|
||||
constexpr int kWrist = 0;
|
||||
constexpr int kIndexFingerMcp = 5;
|
||||
constexpr int kPinkyMcp = 17;
|
||||
float distance = Distance(landmarks.landmark(kWrist),
|
||||
landmarks.landmark(kIndexFingerMcp), width, height);
|
||||
distance = std::max(distance,
|
||||
Distance(landmarks.landmark(kIndexFingerMcp),
|
||||
landmarks.landmark(kPinkyMcp), width, height));
|
||||
distance =
|
||||
std::max(distance, Distance(landmarks.landmark(kPinkyMcp),
|
||||
landmarks.landmark(kWrist), width, height));
|
||||
return distance;
|
||||
}
|
||||
|
||||
Bound CalculateBound(const NormalizedLandmarkList& list) {
|
||||
constexpr float kMinInitialValue = std::numeric_limits<float>::max();
|
||||
constexpr float kMaxInitialValue = std::numeric_limits<float>::lowest();
|
||||
|
||||
// Compute min and max values on landmarks (they will form
|
||||
// bounding box)
|
||||
float bounding_box_left = kMinInitialValue;
|
||||
float bounding_box_top = kMinInitialValue;
|
||||
float bounding_box_right = kMaxInitialValue;
|
||||
float bounding_box_bottom = kMaxInitialValue;
|
||||
for (const auto& landmark : list.landmark()) {
|
||||
bounding_box_left = std::min(bounding_box_left, landmark.x());
|
||||
bounding_box_top = std::min(bounding_box_top, landmark.y());
|
||||
bounding_box_right = std::max(bounding_box_right, landmark.x());
|
||||
bounding_box_bottom = std::max(bounding_box_bottom, landmark.y());
|
||||
}
|
||||
|
||||
// Populate normalized non rotated face bounding box
|
||||
return {.left = bounding_box_left,
|
||||
.top = bounding_box_top,
|
||||
.right = bounding_box_right,
|
||||
.bottom = bounding_box_bottom};
|
||||
}
|
||||
|
||||
// Uses IoU and distance of some corresponding hand landmarks to detect
|
||||
// duplicate / similar hands. IoU, distance thresholds, number of landmarks to
|
||||
// match are found experimentally. Evaluated:
|
||||
// - manually comparing side by side, before and after deduplication applied
|
||||
// - generating gesture dataset, and checking select frames in baseline and
|
||||
// "deduplicated" dataset
|
||||
// - by confirming gesture training is better with use of deduplication using
|
||||
// selected thresholds
|
||||
class HandDuplicatesFinder : public DuplicatesFinder {
|
||||
public:
|
||||
explicit HandDuplicatesFinder(bool start_from_the_end)
|
||||
: start_from_the_end_(start_from_the_end) {}
|
||||
|
||||
absl::StatusOr<absl::flat_hash_set<int>> FindDuplicates(
|
||||
const std::vector<NormalizedLandmarkList>& multi_landmarks,
|
||||
int input_width, int input_height) override {
|
||||
absl::flat_hash_set<int> retained_indices;
|
||||
absl::flat_hash_set<int> suppressed_indices;
|
||||
|
||||
const int num = multi_landmarks.size();
|
||||
std::vector<float> baseline_distances;
|
||||
baseline_distances.reserve(num);
|
||||
std::vector<Bound> bounds;
|
||||
bounds.reserve(num);
|
||||
for (const NormalizedLandmarkList& list : multi_landmarks) {
|
||||
ASSIGN_OR_RETURN(const float baseline_distance,
|
||||
HandBaselineDistance(list, input_width, input_height));
|
||||
baseline_distances.push_back(baseline_distance);
|
||||
bounds.push_back(CalculateBound(list));
|
||||
}
|
||||
|
||||
for (int index = 0; index < num; ++index) {
|
||||
const int i = start_from_the_end_ ? num - index - 1 : index;
|
||||
const float stable_distance_i = baseline_distances[i];
|
||||
bool suppressed = false;
|
||||
for (int j : retained_indices) {
|
||||
const float stable_distance_j = baseline_distances[j];
|
||||
|
||||
constexpr float kAllowedBaselineDistanceRatio = 0.2f;
|
||||
const float distance_threshold =
|
||||
std::max(stable_distance_i, stable_distance_j) *
|
||||
kAllowedBaselineDistanceRatio;
|
||||
|
||||
ASSIGN_OR_RETURN(const std::vector<float> distances,
|
||||
Distances(multi_landmarks[i], multi_landmarks[j],
|
||||
input_width, input_height));
|
||||
const int num_matched_landmarks = absl::c_count_if(
|
||||
distances,
|
||||
[&](float distance) { return distance < distance_threshold; });
|
||||
|
||||
const float iou = CalculateIOU(bounds[i], bounds[j]);
|
||||
|
||||
constexpr int kNumMatchedLandmarksToSuppressHand = 10; // out of 21
|
||||
constexpr float kMinIouThresholdToSuppressHand = 0.2f;
|
||||
if (num_matched_landmarks >= kNumMatchedLandmarksToSuppressHand &&
|
||||
iou > kMinIouThresholdToSuppressHand) {
|
||||
suppressed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (suppressed) {
|
||||
suppressed_indices.insert(i);
|
||||
} else {
|
||||
retained_indices.insert(i);
|
||||
}
|
||||
}
|
||||
return suppressed_indices;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool start_from_the_end_;
|
||||
};
|
||||
|
||||
template <typename InputPortT>
|
||||
absl::StatusOr<absl::optional<typename InputPortT::PayloadT>>
|
||||
VerifyNumAndMaybeInitOutput(const InputPortT& port, CalculatorContext* cc,
|
||||
int num_expected_size) {
|
||||
absl::optional<typename InputPortT::PayloadT> output;
|
||||
if (port(cc).IsConnected() && !port(cc).IsEmpty()) {
|
||||
RET_CHECK_EQ(port(cc).Get().size(), num_expected_size);
|
||||
typename InputPortT::PayloadT result;
|
||||
return {{result}};
|
||||
}
|
||||
return {absl::nullopt};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<DuplicatesFinder> CreateHandDuplicatesFinder(
|
||||
bool start_from_the_end) {
|
||||
return absl::make_unique<HandDuplicatesFinder>(start_from_the_end);
|
||||
}
|
||||
|
||||
absl::Status HandLandmarksDeduplicationCalculator::Process(
|
||||
mediapipe::CalculatorContext* cc) {
|
||||
if (kInLandmarks(cc).IsEmpty()) return absl::OkStatus();
|
||||
if (kInSize(cc).IsEmpty()) return absl::OkStatus();
|
||||
|
||||
const std::vector<NormalizedLandmarkList>& in_landmarks = *kInLandmarks(cc);
|
||||
const std::pair<int, int>& image_size = *kInSize(cc);
|
||||
|
||||
std::unique_ptr<DuplicatesFinder> duplicates_finder =
|
||||
CreateHandDuplicatesFinder(/*start_from_the_end=*/false);
|
||||
ASSIGN_OR_RETURN(absl::flat_hash_set<int> indices_to_remove,
|
||||
duplicates_finder->FindDuplicates(
|
||||
in_landmarks, image_size.first, image_size.second));
|
||||
|
||||
if (indices_to_remove.empty()) {
|
||||
kOutLandmarks(cc).Send(kInLandmarks(cc));
|
||||
kOutRois(cc).Send(kInRois(cc));
|
||||
kOutWorldLandmarks(cc).Send(kInWorldLandmarks(cc));
|
||||
kOutClassifications(cc).Send(kInClassifications(cc));
|
||||
} else {
|
||||
std::vector<NormalizedLandmarkList> out_landmarks;
|
||||
const int num = in_landmarks.size();
|
||||
|
||||
ASSIGN_OR_RETURN(absl::optional<std::vector<NormalizedRect>> out_rois,
|
||||
VerifyNumAndMaybeInitOutput(kInRois, cc, num));
|
||||
ASSIGN_OR_RETURN(
|
||||
absl::optional<std::vector<LandmarkList>> out_world_landmarks,
|
||||
VerifyNumAndMaybeInitOutput(kInWorldLandmarks, cc, num));
|
||||
ASSIGN_OR_RETURN(
|
||||
absl::optional<std::vector<ClassificationList>> out_classifications,
|
||||
VerifyNumAndMaybeInitOutput(kInClassifications, cc, num));
|
||||
|
||||
for (int i = 0; i < num; ++i) {
|
||||
if (indices_to_remove.find(i) != indices_to_remove.end()) continue;
|
||||
|
||||
out_landmarks.push_back(in_landmarks[i]);
|
||||
if (out_rois) {
|
||||
out_rois->push_back(kInRois(cc).Get()[i]);
|
||||
}
|
||||
if (out_world_landmarks) {
|
||||
out_world_landmarks->push_back(kInWorldLandmarks(cc).Get()[i]);
|
||||
}
|
||||
if (out_classifications) {
|
||||
out_classifications->push_back(kInClassifications(cc).Get()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!out_landmarks.empty()) {
|
||||
kOutLandmarks(cc).Send(std::move(out_landmarks));
|
||||
}
|
||||
if (out_rois && !out_rois->empty()) {
|
||||
kOutRois(cc).Send(std::move(out_rois.value()));
|
||||
}
|
||||
if (out_world_landmarks && !out_world_landmarks->empty()) {
|
||||
kOutWorldLandmarks(cc).Send(std::move(out_world_landmarks.value()));
|
||||
}
|
||||
if (out_classifications && !out_classifications->empty()) {
|
||||
kOutClassifications(cc).Send(std::move(out_classifications.value()));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
MEDIAPIPE_REGISTER_NODE(HandLandmarksDeduplicationCalculator);
|
||||
|
||||
} // namespace mediapipe::api2
|
|
@ -0,0 +1,97 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_
|
||||
|
||||
#include "mediapipe/framework/api2/builder.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h"
|
||||
|
||||
namespace mediapipe::api2 {
|
||||
|
||||
// Create a DuplicatesFinder dedicated for finding hand duplications.
|
||||
std::unique_ptr<tasks::vision::utils::DuplicatesFinder>
|
||||
CreateHandDuplicatesFinder(bool start_from_the_end = false);
|
||||
|
||||
// Filter duplicate hand landmarks by finding the overlapped hands.
|
||||
// Inputs:
|
||||
// MULTI_LANDMARKS - std::vector<NormalizedLandmarkList>
|
||||
// The hand landmarks to be filtered.
|
||||
// MULTI_ROIS - std::vector<NormalizedRect>
|
||||
// The regions where each encloses the landmarks of a single hand.
|
||||
// MULTI_WORLD_LANDMARKS - std::vector<LandmarkList>
|
||||
// The hand landmarks to be filtered in world coordinates.
|
||||
// MULTI_CLASSIFICATIONS - std::vector<ClassificationList>
|
||||
// The handedness of hands.
|
||||
// IMAGE_SIZE - std::pair<int, int>
|
||||
// The size of the image which the hand landmarks are detected on.
|
||||
//
|
||||
// Outputs:
|
||||
// MULTI_LANDMARKS - std::vector<NormalizedLandmarkList>
|
||||
// The hand landmarks with duplication removed.
|
||||
// MULTI_ROIS - std::vector<NormalizedRect>
|
||||
// The regions where each encloses the landmarks of a single hand with
|
||||
// duplicate hands removed.
|
||||
// MULTI_WORLD_LANDMARKS - std::vector<LandmarkList>
|
||||
// The hand landmarks with duplication removed in world coordinates.
|
||||
// MULTI_CLASSIFICATIONS - std::vector<ClassificationList>
|
||||
// The handedness of hands with duplicate hands removed.
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "HandLandmarksDeduplicationCalculator"
|
||||
// input_stream: "MULTI_LANDMARKS:landmarks_in"
|
||||
// input_stream: "MULTI_ROIS:rois_in"
|
||||
// input_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_in"
|
||||
// input_stream: "MULTI_CLASSIFICATIONS:handedness_in"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// output_stream: "MULTI_LANDMARKS:landmarks_out"
|
||||
// output_stream: "MULTI_ROIS:rois_out"
|
||||
// output_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_out"
|
||||
// output_stream: "MULTI_CLASSIFICATIONS:handedness_out"
|
||||
// }
|
||||
class HandLandmarksDeduplicationCalculator : public Node {
|
||||
public:
|
||||
constexpr static Input<std::vector<mediapipe::NormalizedLandmarkList>>
|
||||
kInLandmarks{"MULTI_LANDMARKS"};
|
||||
constexpr static Input<std::vector<mediapipe::NormalizedRect>>::Optional
|
||||
kInRois{"MULTI_ROIS"};
|
||||
constexpr static Input<std::vector<mediapipe::LandmarkList>>::Optional
|
||||
kInWorldLandmarks{"MULTI_WORLD_LANDMARKS"};
|
||||
constexpr static Input<std::vector<mediapipe::ClassificationList>>::Optional
|
||||
kInClassifications{"MULTI_CLASSIFICATIONS"};
|
||||
constexpr static Input<std::pair<int, int>> kInSize{"IMAGE_SIZE"};
|
||||
|
||||
constexpr static Output<std::vector<mediapipe::NormalizedLandmarkList>>
|
||||
kOutLandmarks{"MULTI_LANDMARKS"};
|
||||
constexpr static Output<std::vector<mediapipe::NormalizedRect>>::Optional
|
||||
kOutRois{"MULTI_ROIS"};
|
||||
constexpr static Output<std::vector<mediapipe::LandmarkList>>::Optional
|
||||
kOutWorldLandmarks{"MULTI_WORLD_LANDMARKS"};
|
||||
constexpr static Output<std::vector<mediapipe::ClassificationList>>::Optional
|
||||
kOutClassifications{"MULTI_CLASSIFICATIONS"};
|
||||
MEDIAPIPE_NODE_CONTRACT(kInLandmarks, kInRois, kInWorldLandmarks,
|
||||
kInClassifications, kInSize, kOutLandmarks, kOutRois,
|
||||
kOutWorldLandmarks, kOutClassifications);
|
||||
absl::Status Process(mediapipe::CalculatorContext* cc) override;
|
||||
};
|
||||
|
||||
} // namespace mediapipe::api2
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_
|
|
@ -247,11 +247,37 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
image_in >> hand_landmarks_detector_graph.In("IMAGE");
|
||||
clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT");
|
||||
|
||||
auto landmarks = hand_landmarks_detector_graph.Out(kLandmarksTag);
|
||||
auto world_landmarks =
|
||||
hand_landmarks_detector_graph.Out(kWorldLandmarksTag);
|
||||
auto hand_rects_for_next_frame =
|
||||
hand_landmarks_detector_graph[Output<std::vector<NormalizedRect>>(
|
||||
kHandRectNextFrameTag)];
|
||||
hand_landmarks_detector_graph.Out(kHandRectNextFrameTag);
|
||||
auto handedness = hand_landmarks_detector_graph.Out(kHandednessTag);
|
||||
|
||||
auto& image_property = graph.AddNode("ImagePropertiesCalculator");
|
||||
image_in >> image_property.In("IMAGE");
|
||||
auto image_size = image_property.Out("SIZE");
|
||||
|
||||
auto& deduplicate = graph.AddNode("HandLandmarksDeduplicationCalculator");
|
||||
landmarks >> deduplicate.In("MULTI_LANDMARKS");
|
||||
world_landmarks >> deduplicate.In("MULTI_WORLD_LANDMARKS");
|
||||
hand_rects_for_next_frame >> deduplicate.In("MULTI_ROIS");
|
||||
handedness >> deduplicate.In("MULTI_CLASSIFICATIONS");
|
||||
image_size >> deduplicate.In("IMAGE_SIZE");
|
||||
|
||||
auto filtered_landmarks =
|
||||
deduplicate[Output<std::vector<NormalizedLandmarkList>>(
|
||||
"MULTI_LANDMARKS")];
|
||||
auto filtered_world_landmarks =
|
||||
deduplicate[Output<std::vector<LandmarkList>>("MULTI_WORLD_LANDMARKS")];
|
||||
auto filtered_hand_rects_for_next_frame =
|
||||
deduplicate[Output<std::vector<NormalizedRect>>("MULTI_ROIS")];
|
||||
auto filtered_handedness =
|
||||
deduplicate[Output<std::vector<ClassificationList>>(
|
||||
"MULTI_CLASSIFICATIONS")];
|
||||
|
||||
// Back edge.
|
||||
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
|
||||
filtered_hand_rects_for_next_frame >> previous_loopback.In("LOOP");
|
||||
|
||||
// TODO: Replace PassThroughCalculator with a calculator that
|
||||
// converts the pixel data to be stored on the target storage (CPU vs GPU).
|
||||
|
@ -259,14 +285,10 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
image_in >> pass_through.In("");
|
||||
|
||||
return {{
|
||||
/* landmark_lists= */ hand_landmarks_detector_graph
|
||||
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
|
||||
/* world_landmark_lists= */
|
||||
hand_landmarks_detector_graph[Output<std::vector<LandmarkList>>(
|
||||
kWorldLandmarksTag)],
|
||||
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
|
||||
hand_landmarks_detector_graph[Output<std::vector<ClassificationList>>(
|
||||
kHandednessTag)],
|
||||
/* landmark_lists= */ filtered_landmarks,
|
||||
/* world_landmark_lists= */ filtered_world_landmarks,
|
||||
/* hand_rects_next_frame= */ filtered_hand_rects_for_next_frame,
|
||||
/* handedness= */ filtered_handedness,
|
||||
/* palm_rects= */
|
||||
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
|
||||
/* palm_detections */
|
||||
|
|
|
@ -79,3 +79,30 @@ cc_library(
|
|||
"@stblib//:stb_image",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "landmarks_duplicates_finder",
|
||||
hdrs = ["landmarks_duplicates_finder.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "landmarks_utils",
|
||||
srcs = ["landmarks_utils.cc"],
|
||||
hdrs = ["landmarks_utils.h"],
|
||||
deps = ["//mediapipe/tasks/cc/components/containers:landmarks_detection"],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "landmarks_utils_test",
|
||||
srcs = ["landmarks_utils_test.cc"],
|
||||
deps = [
|
||||
":landmarks_utils",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/tasks/cc/components/containers:landmarks_detection",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::utils {
|
||||
|
||||
class DuplicatesFinder {
|
||||
public:
|
||||
virtual ~DuplicatesFinder() = default;
|
||||
// Returns indices of landmark lists to remove to make @multi_landmarks
|
||||
// contain different enough (depending on the implementation) landmark lists
|
||||
// only.
|
||||
virtual absl::StatusOr<absl::flat_hash_set<int>> FindDuplicates(
|
||||
const std::vector<mediapipe::NormalizedLandmarkList>& multi_landmarks,
|
||||
int input_width, int input_height) = 0;
|
||||
};
|
||||
|
||||
} // namespace mediapipe::tasks::vision::utils
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_
|
48
mediapipe/tasks/cc/vision/utils/landmarks_utils.cc
Normal file
48
mediapipe/tasks/cc/vision/utils/landmarks_utils.cc
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace mediapipe::tasks::vision::utils {
|
||||
|
||||
using ::mediapipe::tasks::components::containers::Bound;
|
||||
|
||||
float CalculateArea(const Bound& bound) {
|
||||
return (bound.right - bound.left) * (bound.bottom - bound.top);
|
||||
}
|
||||
|
||||
float CalculateIntersectionArea(const Bound& a, const Bound& b) {
|
||||
const float intersection_left = std::max<float>(a.left, b.left);
|
||||
const float intersection_top = std::max<float>(a.top, b.top);
|
||||
const float intersection_right = std::min<float>(a.right, b.right);
|
||||
const float intersection_bottom = std::min<float>(a.bottom, b.bottom);
|
||||
|
||||
return std::max<float>(intersection_bottom - intersection_top, 0.0) *
|
||||
std::max<float>(intersection_right - intersection_left, 0.0);
|
||||
}
|
||||
|
||||
float CalculateIOU(const Bound& a, const Bound& b) {
|
||||
const float area_a = CalculateArea(a);
|
||||
const float area_b = CalculateArea(b);
|
||||
if (area_a <= 0 || area_b <= 0) return 0.0;
|
||||
|
||||
const float intersection_area = CalculateIntersectionArea(a, b);
|
||||
return intersection_area / (area_a + area_b - intersection_area);
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::vision::utils
|
41
mediapipe/tasks/cc/vision/utils/landmarks_utils.h
Normal file
41
mediapipe/tasks/cc/vision/utils/landmarks_utils.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::utils {
|
||||
|
||||
// Calculates intersection over union for two bounds.
|
||||
float CalculateIOU(const components::containers::Bound& a,
|
||||
const components::containers::Bound& b);
|
||||
|
||||
// Calculates area for face bound
|
||||
float CalculateArea(const components::containers::Bound& bound);
|
||||
|
||||
// Calucates intersection area of two face bounds
|
||||
float CalculateIntersectionArea(const components::containers::Bound& a,
|
||||
const components::containers::Bound& b);
|
||||
} // namespace mediapipe::tasks::vision::utils
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_
|
41
mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc
Normal file
41
mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc
Normal file
|
@ -0,0 +1,41 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
|
||||
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::utils {
|
||||
namespace {
|
||||
|
||||
TEST(LandmarkUtilsTest, CalculateIOU) {
|
||||
// Do not intersect
|
||||
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 2, 3, 3}));
|
||||
// No x intersection
|
||||
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 0, 3, 1}));
|
||||
// No y intersection
|
||||
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {0, 2, 1, 3}));
|
||||
// Full intersection
|
||||
EXPECT_EQ(1, CalculateIOU({0, 0, 2, 2}, {0, 0, 2, 2}));
|
||||
|
||||
// Union is 4 intersection is 1
|
||||
EXPECT_EQ(0.25, CalculateIOU({0, 0, 3, 1}, {2, 0, 4, 1}));
|
||||
|
||||
// Same in by y
|
||||
EXPECT_EQ(0.25, CalculateIOU({0, 0, 1, 3}, {0, 2, 1, 4}));
|
||||
}
|
||||
} // namespace
|
||||
} // namespace mediapipe::tasks::vision::utils
|
Loading…
Reference in New Issue
Block a user