Migrate landmarks deduplication to mediapipe tasks.

PiperOrigin-RevId: 479681836
This commit is contained in:
MediaPipe Team 2022-10-07 15:31:34 -07:00 committed by Copybara-Service
parent d90daa859f
commit b616bc4427
12 changed files with 727 additions and 13 deletions

View File

@ -0,0 +1,22 @@
# Copyright 2022 The MediaPipe Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
cc_library(
name = "landmarks_detection",
hdrs = ["landmarks_detection.h"],
)

View File

@ -0,0 +1,43 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_
#include <vector>
// Sturcts holding landmarks related data structure for hand landmarker, pose
// detector, face mesher, etc.
namespace mediapipe::tasks::components::containers {
// x and y are in [0,1] range with origin in top left in input image space.
// If model provides z, z is in the same scale as x. origin is in the center
// of the face.
struct Landmark {
float x;
float y;
float z;
};
// [0, 1] range in input image space
struct Bound {
float left;
float top;
float right;
float bottom;
};
} // namespace mediapipe::tasks::components::containers
#endif // MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_

View File

@ -80,6 +80,7 @@ cc_library(
"//mediapipe/calculators/core:gate_calculator_cc_proto",
"//mediapipe/calculators/core:pass_through_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto",
"//mediapipe/framework/api2:builder",
@ -98,6 +99,7 @@ cc_library(
"//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
],

View File

@ -15,7 +15,6 @@
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
package(default_visibility = [
"//mediapipe/app/xeno:__subpackages__",
"//mediapipe/tasks:internal",
])
@ -46,4 +45,26 @@ cc_library(
alwayslink = 1,
)
# TODO: Enable this test
cc_library(
name = "hand_landmarks_deduplication_calculator",
srcs = ["hand_landmarks_deduplication_calculator.cc"],
hdrs = ["hand_landmarks_deduplication_calculator.h"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:builder",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/tasks/cc/components/containers:landmarks_detection",
"//mediapipe/tasks/cc/vision/utils:landmarks_duplicates_finder",
"//mediapipe/tasks/cc/vision/utils:landmarks_utils",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/types:optional",
],
alwayslink = 1,
)

View File

@ -0,0 +1,310 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include <optional>
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_set.h"
#include "absl/memory/memory.h"
#include "absl/status/statusor.h"
#include "absl/types/optional.h"
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h"
#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h"
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
namespace mediapipe::api2 {
namespace {
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Source;
using ::mediapipe::tasks::components::containers::Bound;
using ::mediapipe::tasks::vision::utils::CalculateIOU;
using ::mediapipe::tasks::vision::utils::DuplicatesFinder;
float Distance(const NormalizedLandmark& lm_a, const NormalizedLandmark& lm_b,
int width, int height) {
return std::sqrt(std::pow((lm_a.x() - lm_b.x()) * width, 2) +
std::pow((lm_a.y() - lm_b.y()) * height, 2));
}
absl::StatusOr<std::vector<float>> Distances(const NormalizedLandmarkList& a,
const NormalizedLandmarkList& b,
int width, int height) {
const int num = a.landmark_size();
RET_CHECK_EQ(b.landmark_size(), num);
std::vector<float> distances;
distances.reserve(num);
for (int i = 0; i < num; ++i) {
const NormalizedLandmark& lm_a = a.landmark(i);
const NormalizedLandmark& lm_b = b.landmark(i);
distances.push_back(Distance(lm_a, lm_b, width, height));
}
return distances;
}
// Calculates a baseline distance of a hand that can be used as a relative
// measure when calculating hand to hand similarity.
//
// Calculated as maximum of distances: 0->5, 5->17, 17->0, where 0, 5, 17 key
// points are depicted below:
//
// /Middle/
// |
// /Index/ | /Ring/
// | | | /Pinky/
// V V V |
// V
// [8] [12] [16]
// | | | [20]
// | | | |
// /Thumb/ | | | |
// | [7] [11] [15] [19]
// V | | | |
// | | | |
// [4] | | | |
// | [6] [10] [14] [18]
// | | | | |
// | | | | |
// [3] | | | |
// | [5]----[9]---[13]---[17]
// . | |
// \ . |
// \ / |
// [2] |
// \ |
// \ |
// \ |
// [1] .
// \ /
// \ /
// ._____[0]_____.
//
// ^
// |
// /Wrist/
absl::StatusOr<float> HandBaselineDistance(
const NormalizedLandmarkList& landmarks, int width, int height) {
RET_CHECK_EQ(landmarks.landmark_size(), 21); // Num of hand landmarks.
constexpr int kWrist = 0;
constexpr int kIndexFingerMcp = 5;
constexpr int kPinkyMcp = 17;
float distance = Distance(landmarks.landmark(kWrist),
landmarks.landmark(kIndexFingerMcp), width, height);
distance = std::max(distance,
Distance(landmarks.landmark(kIndexFingerMcp),
landmarks.landmark(kPinkyMcp), width, height));
distance =
std::max(distance, Distance(landmarks.landmark(kPinkyMcp),
landmarks.landmark(kWrist), width, height));
return distance;
}
Bound CalculateBound(const NormalizedLandmarkList& list) {
constexpr float kMinInitialValue = std::numeric_limits<float>::max();
constexpr float kMaxInitialValue = std::numeric_limits<float>::lowest();
// Compute min and max values on landmarks (they will form
// bounding box)
float bounding_box_left = kMinInitialValue;
float bounding_box_top = kMinInitialValue;
float bounding_box_right = kMaxInitialValue;
float bounding_box_bottom = kMaxInitialValue;
for (const auto& landmark : list.landmark()) {
bounding_box_left = std::min(bounding_box_left, landmark.x());
bounding_box_top = std::min(bounding_box_top, landmark.y());
bounding_box_right = std::max(bounding_box_right, landmark.x());
bounding_box_bottom = std::max(bounding_box_bottom, landmark.y());
}
// Populate normalized non rotated face bounding box
return {.left = bounding_box_left,
.top = bounding_box_top,
.right = bounding_box_right,
.bottom = bounding_box_bottom};
}
// Uses IoU and distance of some corresponding hand landmarks to detect
// duplicate / similar hands. IoU, distance thresholds, number of landmarks to
// match are found experimentally. Evaluated:
// - manually comparing side by side, before and after deduplication applied
// - generating gesture dataset, and checking select frames in baseline and
// "deduplicated" dataset
// - by confirming gesture training is better with use of deduplication using
// selected thresholds
class HandDuplicatesFinder : public DuplicatesFinder {
public:
explicit HandDuplicatesFinder(bool start_from_the_end)
: start_from_the_end_(start_from_the_end) {}
absl::StatusOr<absl::flat_hash_set<int>> FindDuplicates(
const std::vector<NormalizedLandmarkList>& multi_landmarks,
int input_width, int input_height) override {
absl::flat_hash_set<int> retained_indices;
absl::flat_hash_set<int> suppressed_indices;
const int num = multi_landmarks.size();
std::vector<float> baseline_distances;
baseline_distances.reserve(num);
std::vector<Bound> bounds;
bounds.reserve(num);
for (const NormalizedLandmarkList& list : multi_landmarks) {
ASSIGN_OR_RETURN(const float baseline_distance,
HandBaselineDistance(list, input_width, input_height));
baseline_distances.push_back(baseline_distance);
bounds.push_back(CalculateBound(list));
}
for (int index = 0; index < num; ++index) {
const int i = start_from_the_end_ ? num - index - 1 : index;
const float stable_distance_i = baseline_distances[i];
bool suppressed = false;
for (int j : retained_indices) {
const float stable_distance_j = baseline_distances[j];
constexpr float kAllowedBaselineDistanceRatio = 0.2f;
const float distance_threshold =
std::max(stable_distance_i, stable_distance_j) *
kAllowedBaselineDistanceRatio;
ASSIGN_OR_RETURN(const std::vector<float> distances,
Distances(multi_landmarks[i], multi_landmarks[j],
input_width, input_height));
const int num_matched_landmarks = absl::c_count_if(
distances,
[&](float distance) { return distance < distance_threshold; });
const float iou = CalculateIOU(bounds[i], bounds[j]);
constexpr int kNumMatchedLandmarksToSuppressHand = 10; // out of 21
constexpr float kMinIouThresholdToSuppressHand = 0.2f;
if (num_matched_landmarks >= kNumMatchedLandmarksToSuppressHand &&
iou > kMinIouThresholdToSuppressHand) {
suppressed = true;
break;
}
}
if (suppressed) {
suppressed_indices.insert(i);
} else {
retained_indices.insert(i);
}
}
return suppressed_indices;
}
private:
const bool start_from_the_end_;
};
template <typename InputPortT>
absl::StatusOr<absl::optional<typename InputPortT::PayloadT>>
VerifyNumAndMaybeInitOutput(const InputPortT& port, CalculatorContext* cc,
int num_expected_size) {
absl::optional<typename InputPortT::PayloadT> output;
if (port(cc).IsConnected() && !port(cc).IsEmpty()) {
RET_CHECK_EQ(port(cc).Get().size(), num_expected_size);
typename InputPortT::PayloadT result;
return {{result}};
}
return {absl::nullopt};
}
} // namespace
std::unique_ptr<DuplicatesFinder> CreateHandDuplicatesFinder(
bool start_from_the_end) {
return absl::make_unique<HandDuplicatesFinder>(start_from_the_end);
}
absl::Status HandLandmarksDeduplicationCalculator::Process(
mediapipe::CalculatorContext* cc) {
if (kInLandmarks(cc).IsEmpty()) return absl::OkStatus();
if (kInSize(cc).IsEmpty()) return absl::OkStatus();
const std::vector<NormalizedLandmarkList>& in_landmarks = *kInLandmarks(cc);
const std::pair<int, int>& image_size = *kInSize(cc);
std::unique_ptr<DuplicatesFinder> duplicates_finder =
CreateHandDuplicatesFinder(/*start_from_the_end=*/false);
ASSIGN_OR_RETURN(absl::flat_hash_set<int> indices_to_remove,
duplicates_finder->FindDuplicates(
in_landmarks, image_size.first, image_size.second));
if (indices_to_remove.empty()) {
kOutLandmarks(cc).Send(kInLandmarks(cc));
kOutRois(cc).Send(kInRois(cc));
kOutWorldLandmarks(cc).Send(kInWorldLandmarks(cc));
kOutClassifications(cc).Send(kInClassifications(cc));
} else {
std::vector<NormalizedLandmarkList> out_landmarks;
const int num = in_landmarks.size();
ASSIGN_OR_RETURN(absl::optional<std::vector<NormalizedRect>> out_rois,
VerifyNumAndMaybeInitOutput(kInRois, cc, num));
ASSIGN_OR_RETURN(
absl::optional<std::vector<LandmarkList>> out_world_landmarks,
VerifyNumAndMaybeInitOutput(kInWorldLandmarks, cc, num));
ASSIGN_OR_RETURN(
absl::optional<std::vector<ClassificationList>> out_classifications,
VerifyNumAndMaybeInitOutput(kInClassifications, cc, num));
for (int i = 0; i < num; ++i) {
if (indices_to_remove.find(i) != indices_to_remove.end()) continue;
out_landmarks.push_back(in_landmarks[i]);
if (out_rois) {
out_rois->push_back(kInRois(cc).Get()[i]);
}
if (out_world_landmarks) {
out_world_landmarks->push_back(kInWorldLandmarks(cc).Get()[i]);
}
if (out_classifications) {
out_classifications->push_back(kInClassifications(cc).Get()[i]);
}
}
if (!out_landmarks.empty()) {
kOutLandmarks(cc).Send(std::move(out_landmarks));
}
if (out_rois && !out_rois->empty()) {
kOutRois(cc).Send(std::move(out_rois.value()));
}
if (out_world_landmarks && !out_world_landmarks->empty()) {
kOutWorldLandmarks(cc).Send(std::move(out_world_landmarks.value()));
}
if (out_classifications && !out_classifications->empty()) {
kOutClassifications(cc).Send(std::move(out_classifications.value()));
}
}
return absl::OkStatus();
}
MEDIAPIPE_REGISTER_NODE(HandLandmarksDeduplicationCalculator);
} // namespace mediapipe::api2

View File

@ -0,0 +1,97 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_
#define MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_
#include "mediapipe/framework/api2/builder.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h"
namespace mediapipe::api2 {
// Create a DuplicatesFinder dedicated for finding hand duplications.
std::unique_ptr<tasks::vision::utils::DuplicatesFinder>
CreateHandDuplicatesFinder(bool start_from_the_end = false);
// Filter duplicate hand landmarks by finding the overlapped hands.
// Inputs:
// MULTI_LANDMARKS - std::vector<NormalizedLandmarkList>
// The hand landmarks to be filtered.
// MULTI_ROIS - std::vector<NormalizedRect>
// The regions where each encloses the landmarks of a single hand.
// MULTI_WORLD_LANDMARKS - std::vector<LandmarkList>
// The hand landmarks to be filtered in world coordinates.
// MULTI_CLASSIFICATIONS - std::vector<ClassificationList>
// The handedness of hands.
// IMAGE_SIZE - std::pair<int, int>
// The size of the image which the hand landmarks are detected on.
//
// Outputs:
// MULTI_LANDMARKS - std::vector<NormalizedLandmarkList>
// The hand landmarks with duplication removed.
// MULTI_ROIS - std::vector<NormalizedRect>
// The regions where each encloses the landmarks of a single hand with
// duplicate hands removed.
// MULTI_WORLD_LANDMARKS - std::vector<LandmarkList>
// The hand landmarks with duplication removed in world coordinates.
// MULTI_CLASSIFICATIONS - std::vector<ClassificationList>
// The handedness of hands with duplicate hands removed.
//
// Example:
// node {
// calculator: "HandLandmarksDeduplicationCalculator"
// input_stream: "MULTI_LANDMARKS:landmarks_in"
// input_stream: "MULTI_ROIS:rois_in"
// input_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_in"
// input_stream: "MULTI_CLASSIFICATIONS:handedness_in"
// input_stream: "IMAGE_SIZE:image_size"
// output_stream: "MULTI_LANDMARKS:landmarks_out"
// output_stream: "MULTI_ROIS:rois_out"
// output_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_out"
// output_stream: "MULTI_CLASSIFICATIONS:handedness_out"
// }
class HandLandmarksDeduplicationCalculator : public Node {
public:
constexpr static Input<std::vector<mediapipe::NormalizedLandmarkList>>
kInLandmarks{"MULTI_LANDMARKS"};
constexpr static Input<std::vector<mediapipe::NormalizedRect>>::Optional
kInRois{"MULTI_ROIS"};
constexpr static Input<std::vector<mediapipe::LandmarkList>>::Optional
kInWorldLandmarks{"MULTI_WORLD_LANDMARKS"};
constexpr static Input<std::vector<mediapipe::ClassificationList>>::Optional
kInClassifications{"MULTI_CLASSIFICATIONS"};
constexpr static Input<std::pair<int, int>> kInSize{"IMAGE_SIZE"};
constexpr static Output<std::vector<mediapipe::NormalizedLandmarkList>>
kOutLandmarks{"MULTI_LANDMARKS"};
constexpr static Output<std::vector<mediapipe::NormalizedRect>>::Optional
kOutRois{"MULTI_ROIS"};
constexpr static Output<std::vector<mediapipe::LandmarkList>>::Optional
kOutWorldLandmarks{"MULTI_WORLD_LANDMARKS"};
constexpr static Output<std::vector<mediapipe::ClassificationList>>::Optional
kOutClassifications{"MULTI_CLASSIFICATIONS"};
MEDIAPIPE_NODE_CONTRACT(kInLandmarks, kInRois, kInWorldLandmarks,
kInClassifications, kInSize, kOutLandmarks, kOutRois,
kOutWorldLandmarks, kOutClassifications);
absl::Status Process(mediapipe::CalculatorContext* cc) override;
};
} // namespace mediapipe::api2
#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_

View File

@ -247,11 +247,37 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
image_in >> hand_landmarks_detector_graph.In("IMAGE");
clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT");
auto landmarks = hand_landmarks_detector_graph.Out(kLandmarksTag);
auto world_landmarks =
hand_landmarks_detector_graph.Out(kWorldLandmarksTag);
auto hand_rects_for_next_frame =
hand_landmarks_detector_graph[Output<std::vector<NormalizedRect>>(
kHandRectNextFrameTag)];
hand_landmarks_detector_graph.Out(kHandRectNextFrameTag);
auto handedness = hand_landmarks_detector_graph.Out(kHandednessTag);
auto& image_property = graph.AddNode("ImagePropertiesCalculator");
image_in >> image_property.In("IMAGE");
auto image_size = image_property.Out("SIZE");
auto& deduplicate = graph.AddNode("HandLandmarksDeduplicationCalculator");
landmarks >> deduplicate.In("MULTI_LANDMARKS");
world_landmarks >> deduplicate.In("MULTI_WORLD_LANDMARKS");
hand_rects_for_next_frame >> deduplicate.In("MULTI_ROIS");
handedness >> deduplicate.In("MULTI_CLASSIFICATIONS");
image_size >> deduplicate.In("IMAGE_SIZE");
auto filtered_landmarks =
deduplicate[Output<std::vector<NormalizedLandmarkList>>(
"MULTI_LANDMARKS")];
auto filtered_world_landmarks =
deduplicate[Output<std::vector<LandmarkList>>("MULTI_WORLD_LANDMARKS")];
auto filtered_hand_rects_for_next_frame =
deduplicate[Output<std::vector<NormalizedRect>>("MULTI_ROIS")];
auto filtered_handedness =
deduplicate[Output<std::vector<ClassificationList>>(
"MULTI_CLASSIFICATIONS")];
// Back edge.
hand_rects_for_next_frame >> previous_loopback.In("LOOP");
filtered_hand_rects_for_next_frame >> previous_loopback.In("LOOP");
// TODO: Replace PassThroughCalculator with a calculator that
// converts the pixel data to be stored on the target storage (CPU vs GPU).
@ -259,14 +285,10 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
image_in >> pass_through.In("");
return {{
/* landmark_lists= */ hand_landmarks_detector_graph
[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)],
/* world_landmark_lists= */
hand_landmarks_detector_graph[Output<std::vector<LandmarkList>>(
kWorldLandmarksTag)],
/* hand_rects_next_frame= */ hand_rects_for_next_frame,
hand_landmarks_detector_graph[Output<std::vector<ClassificationList>>(
kHandednessTag)],
/* landmark_lists= */ filtered_landmarks,
/* world_landmark_lists= */ filtered_world_landmarks,
/* hand_rects_next_frame= */ filtered_hand_rects_for_next_frame,
/* handedness= */ filtered_handedness,
/* palm_rects= */
hand_detector[Output<std::vector<NormalizedRect>>(kPalmRectsTag)],
/* palm_detections */

View File

@ -79,3 +79,30 @@ cc_library(
"@stblib//:stb_image",
],
)
cc_library(
name = "landmarks_duplicates_finder",
hdrs = ["landmarks_duplicates_finder.h"],
deps = [
"//mediapipe/framework/formats:landmark_cc_proto",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status:statusor",
],
)
cc_library(
name = "landmarks_utils",
srcs = ["landmarks_utils.cc"],
hdrs = ["landmarks_utils.h"],
deps = ["//mediapipe/tasks/cc/components/containers:landmarks_detection"],
)
cc_test(
name = "landmarks_utils_test",
srcs = ["landmarks_utils_test.cc"],
deps = [
":landmarks_utils",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/tasks/cc/components/containers:landmarks_detection",
],
)

View File

@ -0,0 +1,40 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_
#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/landmark.pb.h"
namespace mediapipe::tasks::vision::utils {
class DuplicatesFinder {
public:
virtual ~DuplicatesFinder() = default;
// Returns indices of landmark lists to remove to make @multi_landmarks
// contain different enough (depending on the implementation) landmark lists
// only.
virtual absl::StatusOr<absl::flat_hash_set<int>> FindDuplicates(
const std::vector<mediapipe::NormalizedLandmarkList>& multi_landmarks,
int input_width, int input_height) = 0;
};
} // namespace mediapipe::tasks::vision::utils
#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_

View File

@ -0,0 +1,48 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
#include <algorithm>
#include <vector>
namespace mediapipe::tasks::vision::utils {
using ::mediapipe::tasks::components::containers::Bound;
float CalculateArea(const Bound& bound) {
return (bound.right - bound.left) * (bound.bottom - bound.top);
}
float CalculateIntersectionArea(const Bound& a, const Bound& b) {
const float intersection_left = std::max<float>(a.left, b.left);
const float intersection_top = std::max<float>(a.top, b.top);
const float intersection_right = std::min<float>(a.right, b.right);
const float intersection_bottom = std::min<float>(a.bottom, b.bottom);
return std::max<float>(intersection_bottom - intersection_top, 0.0) *
std::max<float>(intersection_right - intersection_left, 0.0);
}
float CalculateIOU(const Bound& a, const Bound& b) {
const float area_a = CalculateArea(a);
const float area_b = CalculateArea(b);
if (area_a <= 0 || area_b <= 0) return 0.0;
const float intersection_area = CalculateIntersectionArea(a, b);
return intersection_area / (area_a + area_b - intersection_area);
}
} // namespace mediapipe::tasks::vision::utils

View File

@ -0,0 +1,41 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_
#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <limits>
#include <vector>
#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h"
namespace mediapipe::tasks::vision::utils {
// Calculates intersection over union for two bounds.
float CalculateIOU(const components::containers::Bound& a,
const components::containers::Bound& b);
// Calculates area for face bound
float CalculateArea(const components::containers::Bound& bound);
// Calucates intersection area of two face bounds
float CalculateIntersectionArea(const components::containers::Bound& a,
const components::containers::Bound& b);
} // namespace mediapipe::tasks::vision::utils
#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_

View File

@ -0,0 +1,41 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
namespace mediapipe::tasks::vision::utils {
namespace {
TEST(LandmarkUtilsTest, CalculateIOU) {
// Do not intersect
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 2, 3, 3}));
// No x intersection
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 0, 3, 1}));
// No y intersection
EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {0, 2, 1, 3}));
// Full intersection
EXPECT_EQ(1, CalculateIOU({0, 0, 2, 2}, {0, 0, 2, 2}));
// Union is 4 intersection is 1
EXPECT_EQ(0.25, CalculateIOU({0, 0, 3, 1}, {2, 0, 4, 1}));
// Same in by y
EXPECT_EQ(0.25, CalculateIOU({0, 0, 1, 3}, {0, 2, 1, 4}));
}
} // namespace
} // namespace mediapipe::tasks::vision::utils