From b616bc4427c6317af66ae06f5f7c50af8f16d0c6 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 7 Oct 2022 15:31:34 -0700 Subject: [PATCH] Migrate landmarks deduplication to mediapipe tasks. PiperOrigin-RevId: 479681836 --- .../tasks/cc/components/containers/BUILD | 22 ++ .../containers/landmarks_detection.h | 43 +++ .../tasks/cc/vision/hand_landmarker/BUILD | 2 + .../vision/hand_landmarker/calculators/BUILD | 25 +- ...hand_landmarks_deduplication_calculator.cc | 310 ++++++++++++++++++ .../hand_landmarks_deduplication_calculator.h | 97 ++++++ .../hand_landmarker/hand_landmarker_graph.cc | 44 ++- mediapipe/tasks/cc/vision/utils/BUILD | 27 ++ .../utils/landmarks_duplicates_finder.h | 40 +++ .../tasks/cc/vision/utils/landmarks_utils.cc | 48 +++ .../tasks/cc/vision/utils/landmarks_utils.h | 41 +++ .../cc/vision/utils/landmarks_utils_test.cc | 41 +++ 12 files changed, 727 insertions(+), 13 deletions(-) create mode 100644 mediapipe/tasks/cc/components/containers/BUILD create mode 100644 mediapipe/tasks/cc/components/containers/landmarks_detection.h create mode 100644 mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.cc create mode 100644 mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h create mode 100644 mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h create mode 100644 mediapipe/tasks/cc/vision/utils/landmarks_utils.cc create mode 100644 mediapipe/tasks/cc/vision/utils/landmarks_utils.h create mode 100644 mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc diff --git a/mediapipe/tasks/cc/components/containers/BUILD b/mediapipe/tasks/cc/components/containers/BUILD new file mode 100644 index 000000000..ca688caf7 --- /dev/null +++ b/mediapipe/tasks/cc/components/containers/BUILD @@ -0,0 +1,22 @@ +# Copyright 2022 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "landmarks_detection", + hdrs = ["landmarks_detection.h"], +) diff --git a/mediapipe/tasks/cc/components/containers/landmarks_detection.h b/mediapipe/tasks/cc/components/containers/landmarks_detection.h new file mode 100644 index 000000000..7339954d8 --- /dev/null +++ b/mediapipe/tasks/cc/components/containers/landmarks_detection.h @@ -0,0 +1,43 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_ +#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_ + +#include + +// Sturcts holding landmarks related data structure for hand landmarker, pose +// detector, face mesher, etc. +namespace mediapipe::tasks::components::containers { + +// x and y are in [0,1] range with origin in top left in input image space. +// If model provides z, z is in the same scale as x. origin is in the center +// of the face. +struct Landmark { + float x; + float y; + float z; +}; + +// [0, 1] range in input image space +struct Bound { + float left; + float top; + float right; + float bottom; +}; + +} // namespace mediapipe::tasks::components::containers +#endif // MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARKS_DETECTION_H_ diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index a2bb458db..e8a832bbc 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -80,6 +80,7 @@ cc_library( "//mediapipe/calculators/core:gate_calculator_cc_proto", "//mediapipe/calculators/core:pass_through_calculator", "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", "//mediapipe/calculators/util:collection_has_min_size_calculator", "//mediapipe/calculators/util:collection_has_min_size_calculator_cc_proto", "//mediapipe/framework/api2:builder", @@ -98,6 +99,7 @@ cc_library( "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator", "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_association_calculator_cc_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", ], diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/calculators/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/BUILD index dea81bae3..3b82153eb 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/calculators/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/BUILD @@ -15,7 +15,6 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") package(default_visibility = [ - "//mediapipe/app/xeno:__subpackages__", "//mediapipe/tasks:internal", ]) @@ -46,4 +45,26 @@ cc_library( alwayslink = 1, ) -# TODO: Enable this test +cc_library( + name = "hand_landmarks_deduplication_calculator", + srcs = ["hand_landmarks_deduplication_calculator.cc"], + hdrs = ["hand_landmarks_deduplication_calculator.h"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/api2:port", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/tasks/cc/components/containers:landmarks_detection", + "//mediapipe/tasks/cc/vision/utils:landmarks_duplicates_finder", + "//mediapipe/tasks/cc/vision/utils:landmarks_utils", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/types:optional", + ], + alwayslink = 1, +) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.cc b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.cc new file mode 100644 index 000000000..8920ea0cb --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.cc @@ -0,0 +1,310 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_set.h" +#include "absl/memory/memory.h" +#include "absl/status/statusor.h" +#include "absl/types/optional.h" +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h" +#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h" +#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h" + +namespace mediapipe::api2 { +namespace { + +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Output; +using ::mediapipe::api2::builder::Source; +using ::mediapipe::tasks::components::containers::Bound; +using ::mediapipe::tasks::vision::utils::CalculateIOU; +using ::mediapipe::tasks::vision::utils::DuplicatesFinder; + +float Distance(const NormalizedLandmark& lm_a, const NormalizedLandmark& lm_b, + int width, int height) { + return std::sqrt(std::pow((lm_a.x() - lm_b.x()) * width, 2) + + std::pow((lm_a.y() - lm_b.y()) * height, 2)); +} + +absl::StatusOr> Distances(const NormalizedLandmarkList& a, + const NormalizedLandmarkList& b, + int width, int height) { + const int num = a.landmark_size(); + RET_CHECK_EQ(b.landmark_size(), num); + std::vector distances; + distances.reserve(num); + for (int i = 0; i < num; ++i) { + const NormalizedLandmark& lm_a = a.landmark(i); + const NormalizedLandmark& lm_b = b.landmark(i); + distances.push_back(Distance(lm_a, lm_b, width, height)); + } + return distances; +} + +// Calculates a baseline distance of a hand that can be used as a relative +// measure when calculating hand to hand similarity. +// +// Calculated as maximum of distances: 0->5, 5->17, 17->0, where 0, 5, 17 key +// points are depicted below: +// +// /Middle/ +// | +// /Index/ | /Ring/ +// | | | /Pinky/ +// V V V | +// V +// [8] [12] [16] +// | | | [20] +// | | | | +// /Thumb/ | | | | +// | [7] [11] [15] [19] +// V | | | | +// | | | | +// [4] | | | | +// | [6] [10] [14] [18] +// | | | | | +// | | | | | +// [3] | | | | +// | [5]----[9]---[13]---[17] +// . | | +// \ . | +// \ / | +// [2] | +// \ | +// \ | +// \ | +// [1] . +// \ / +// \ / +// ._____[0]_____. +// +// ^ +// | +// /Wrist/ +absl::StatusOr HandBaselineDistance( + const NormalizedLandmarkList& landmarks, int width, int height) { + RET_CHECK_EQ(landmarks.landmark_size(), 21); // Num of hand landmarks. + constexpr int kWrist = 0; + constexpr int kIndexFingerMcp = 5; + constexpr int kPinkyMcp = 17; + float distance = Distance(landmarks.landmark(kWrist), + landmarks.landmark(kIndexFingerMcp), width, height); + distance = std::max(distance, + Distance(landmarks.landmark(kIndexFingerMcp), + landmarks.landmark(kPinkyMcp), width, height)); + distance = + std::max(distance, Distance(landmarks.landmark(kPinkyMcp), + landmarks.landmark(kWrist), width, height)); + return distance; +} + +Bound CalculateBound(const NormalizedLandmarkList& list) { + constexpr float kMinInitialValue = std::numeric_limits::max(); + constexpr float kMaxInitialValue = std::numeric_limits::lowest(); + + // Compute min and max values on landmarks (they will form + // bounding box) + float bounding_box_left = kMinInitialValue; + float bounding_box_top = kMinInitialValue; + float bounding_box_right = kMaxInitialValue; + float bounding_box_bottom = kMaxInitialValue; + for (const auto& landmark : list.landmark()) { + bounding_box_left = std::min(bounding_box_left, landmark.x()); + bounding_box_top = std::min(bounding_box_top, landmark.y()); + bounding_box_right = std::max(bounding_box_right, landmark.x()); + bounding_box_bottom = std::max(bounding_box_bottom, landmark.y()); + } + + // Populate normalized non rotated face bounding box + return {.left = bounding_box_left, + .top = bounding_box_top, + .right = bounding_box_right, + .bottom = bounding_box_bottom}; +} + +// Uses IoU and distance of some corresponding hand landmarks to detect +// duplicate / similar hands. IoU, distance thresholds, number of landmarks to +// match are found experimentally. Evaluated: +// - manually comparing side by side, before and after deduplication applied +// - generating gesture dataset, and checking select frames in baseline and +// "deduplicated" dataset +// - by confirming gesture training is better with use of deduplication using +// selected thresholds +class HandDuplicatesFinder : public DuplicatesFinder { + public: + explicit HandDuplicatesFinder(bool start_from_the_end) + : start_from_the_end_(start_from_the_end) {} + + absl::StatusOr> FindDuplicates( + const std::vector& multi_landmarks, + int input_width, int input_height) override { + absl::flat_hash_set retained_indices; + absl::flat_hash_set suppressed_indices; + + const int num = multi_landmarks.size(); + std::vector baseline_distances; + baseline_distances.reserve(num); + std::vector bounds; + bounds.reserve(num); + for (const NormalizedLandmarkList& list : multi_landmarks) { + ASSIGN_OR_RETURN(const float baseline_distance, + HandBaselineDistance(list, input_width, input_height)); + baseline_distances.push_back(baseline_distance); + bounds.push_back(CalculateBound(list)); + } + + for (int index = 0; index < num; ++index) { + const int i = start_from_the_end_ ? num - index - 1 : index; + const float stable_distance_i = baseline_distances[i]; + bool suppressed = false; + for (int j : retained_indices) { + const float stable_distance_j = baseline_distances[j]; + + constexpr float kAllowedBaselineDistanceRatio = 0.2f; + const float distance_threshold = + std::max(stable_distance_i, stable_distance_j) * + kAllowedBaselineDistanceRatio; + + ASSIGN_OR_RETURN(const std::vector distances, + Distances(multi_landmarks[i], multi_landmarks[j], + input_width, input_height)); + const int num_matched_landmarks = absl::c_count_if( + distances, + [&](float distance) { return distance < distance_threshold; }); + + const float iou = CalculateIOU(bounds[i], bounds[j]); + + constexpr int kNumMatchedLandmarksToSuppressHand = 10; // out of 21 + constexpr float kMinIouThresholdToSuppressHand = 0.2f; + if (num_matched_landmarks >= kNumMatchedLandmarksToSuppressHand && + iou > kMinIouThresholdToSuppressHand) { + suppressed = true; + break; + } + } + + if (suppressed) { + suppressed_indices.insert(i); + } else { + retained_indices.insert(i); + } + } + return suppressed_indices; + } + + private: + const bool start_from_the_end_; +}; + +template +absl::StatusOr> +VerifyNumAndMaybeInitOutput(const InputPortT& port, CalculatorContext* cc, + int num_expected_size) { + absl::optional output; + if (port(cc).IsConnected() && !port(cc).IsEmpty()) { + RET_CHECK_EQ(port(cc).Get().size(), num_expected_size); + typename InputPortT::PayloadT result; + return {{result}}; + } + return {absl::nullopt}; +} +} // namespace + +std::unique_ptr CreateHandDuplicatesFinder( + bool start_from_the_end) { + return absl::make_unique(start_from_the_end); +} + +absl::Status HandLandmarksDeduplicationCalculator::Process( + mediapipe::CalculatorContext* cc) { + if (kInLandmarks(cc).IsEmpty()) return absl::OkStatus(); + if (kInSize(cc).IsEmpty()) return absl::OkStatus(); + + const std::vector& in_landmarks = *kInLandmarks(cc); + const std::pair& image_size = *kInSize(cc); + + std::unique_ptr duplicates_finder = + CreateHandDuplicatesFinder(/*start_from_the_end=*/false); + ASSIGN_OR_RETURN(absl::flat_hash_set indices_to_remove, + duplicates_finder->FindDuplicates( + in_landmarks, image_size.first, image_size.second)); + + if (indices_to_remove.empty()) { + kOutLandmarks(cc).Send(kInLandmarks(cc)); + kOutRois(cc).Send(kInRois(cc)); + kOutWorldLandmarks(cc).Send(kInWorldLandmarks(cc)); + kOutClassifications(cc).Send(kInClassifications(cc)); + } else { + std::vector out_landmarks; + const int num = in_landmarks.size(); + + ASSIGN_OR_RETURN(absl::optional> out_rois, + VerifyNumAndMaybeInitOutput(kInRois, cc, num)); + ASSIGN_OR_RETURN( + absl::optional> out_world_landmarks, + VerifyNumAndMaybeInitOutput(kInWorldLandmarks, cc, num)); + ASSIGN_OR_RETURN( + absl::optional> out_classifications, + VerifyNumAndMaybeInitOutput(kInClassifications, cc, num)); + + for (int i = 0; i < num; ++i) { + if (indices_to_remove.find(i) != indices_to_remove.end()) continue; + + out_landmarks.push_back(in_landmarks[i]); + if (out_rois) { + out_rois->push_back(kInRois(cc).Get()[i]); + } + if (out_world_landmarks) { + out_world_landmarks->push_back(kInWorldLandmarks(cc).Get()[i]); + } + if (out_classifications) { + out_classifications->push_back(kInClassifications(cc).Get()[i]); + } + } + + if (!out_landmarks.empty()) { + kOutLandmarks(cc).Send(std::move(out_landmarks)); + } + if (out_rois && !out_rois->empty()) { + kOutRois(cc).Send(std::move(out_rois.value())); + } + if (out_world_landmarks && !out_world_landmarks->empty()) { + kOutWorldLandmarks(cc).Send(std::move(out_world_landmarks.value())); + } + if (out_classifications && !out_classifications->empty()) { + kOutClassifications(cc).Send(std::move(out_classifications.value())); + } + } + return absl::OkStatus(); +} +MEDIAPIPE_REGISTER_NODE(HandLandmarksDeduplicationCalculator); + +} // namespace mediapipe::api2 diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h new file mode 100644 index 000000000..d7b435487 --- /dev/null +++ b/mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_landmarks_deduplication_calculator.h @@ -0,0 +1,97 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_ +#define MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_ + +#include "mediapipe/framework/api2/builder.h" +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h" + +namespace mediapipe::api2 { + +// Create a DuplicatesFinder dedicated for finding hand duplications. +std::unique_ptr +CreateHandDuplicatesFinder(bool start_from_the_end = false); + +// Filter duplicate hand landmarks by finding the overlapped hands. +// Inputs: +// MULTI_LANDMARKS - std::vector +// The hand landmarks to be filtered. +// MULTI_ROIS - std::vector +// The regions where each encloses the landmarks of a single hand. +// MULTI_WORLD_LANDMARKS - std::vector +// The hand landmarks to be filtered in world coordinates. +// MULTI_CLASSIFICATIONS - std::vector +// The handedness of hands. +// IMAGE_SIZE - std::pair +// The size of the image which the hand landmarks are detected on. +// +// Outputs: +// MULTI_LANDMARKS - std::vector +// The hand landmarks with duplication removed. +// MULTI_ROIS - std::vector +// The regions where each encloses the landmarks of a single hand with +// duplicate hands removed. +// MULTI_WORLD_LANDMARKS - std::vector +// The hand landmarks with duplication removed in world coordinates. +// MULTI_CLASSIFICATIONS - std::vector +// The handedness of hands with duplicate hands removed. +// +// Example: +// node { +// calculator: "HandLandmarksDeduplicationCalculator" +// input_stream: "MULTI_LANDMARKS:landmarks_in" +// input_stream: "MULTI_ROIS:rois_in" +// input_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_in" +// input_stream: "MULTI_CLASSIFICATIONS:handedness_in" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "MULTI_LANDMARKS:landmarks_out" +// output_stream: "MULTI_ROIS:rois_out" +// output_stream: "MULTI_WORLD_LANDMARKS:world_landmarks_out" +// output_stream: "MULTI_CLASSIFICATIONS:handedness_out" +// } +class HandLandmarksDeduplicationCalculator : public Node { + public: + constexpr static Input> + kInLandmarks{"MULTI_LANDMARKS"}; + constexpr static Input>::Optional + kInRois{"MULTI_ROIS"}; + constexpr static Input>::Optional + kInWorldLandmarks{"MULTI_WORLD_LANDMARKS"}; + constexpr static Input>::Optional + kInClassifications{"MULTI_CLASSIFICATIONS"}; + constexpr static Input> kInSize{"IMAGE_SIZE"}; + + constexpr static Output> + kOutLandmarks{"MULTI_LANDMARKS"}; + constexpr static Output>::Optional + kOutRois{"MULTI_ROIS"}; + constexpr static Output>::Optional + kOutWorldLandmarks{"MULTI_WORLD_LANDMARKS"}; + constexpr static Output>::Optional + kOutClassifications{"MULTI_CLASSIFICATIONS"}; + MEDIAPIPE_NODE_CONTRACT(kInLandmarks, kInRois, kInWorldLandmarks, + kInClassifications, kInSize, kOutLandmarks, kOutRois, + kOutWorldLandmarks, kOutClassifications); + absl::Status Process(mediapipe::CalculatorContext* cc) override; +}; + +} // namespace mediapipe::api2 + +#endif // MEDIAPIPE_TASKS_CC_VISION_HAND_LANDMARKER_CALCULATORS_HAND_LANDMARKS_DEDUPLICATION_CALCULATOR_H_ diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc index 949c06520..ab5a453c5 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -247,11 +247,37 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { image_in >> hand_landmarks_detector_graph.In("IMAGE"); clipped_hand_rects >> hand_landmarks_detector_graph.In("HAND_RECT"); + auto landmarks = hand_landmarks_detector_graph.Out(kLandmarksTag); + auto world_landmarks = + hand_landmarks_detector_graph.Out(kWorldLandmarksTag); auto hand_rects_for_next_frame = - hand_landmarks_detector_graph[Output>( - kHandRectNextFrameTag)]; + hand_landmarks_detector_graph.Out(kHandRectNextFrameTag); + auto handedness = hand_landmarks_detector_graph.Out(kHandednessTag); + + auto& image_property = graph.AddNode("ImagePropertiesCalculator"); + image_in >> image_property.In("IMAGE"); + auto image_size = image_property.Out("SIZE"); + + auto& deduplicate = graph.AddNode("HandLandmarksDeduplicationCalculator"); + landmarks >> deduplicate.In("MULTI_LANDMARKS"); + world_landmarks >> deduplicate.In("MULTI_WORLD_LANDMARKS"); + hand_rects_for_next_frame >> deduplicate.In("MULTI_ROIS"); + handedness >> deduplicate.In("MULTI_CLASSIFICATIONS"); + image_size >> deduplicate.In("IMAGE_SIZE"); + + auto filtered_landmarks = + deduplicate[Output>( + "MULTI_LANDMARKS")]; + auto filtered_world_landmarks = + deduplicate[Output>("MULTI_WORLD_LANDMARKS")]; + auto filtered_hand_rects_for_next_frame = + deduplicate[Output>("MULTI_ROIS")]; + auto filtered_handedness = + deduplicate[Output>( + "MULTI_CLASSIFICATIONS")]; + // Back edge. - hand_rects_for_next_frame >> previous_loopback.In("LOOP"); + filtered_hand_rects_for_next_frame >> previous_loopback.In("LOOP"); // TODO: Replace PassThroughCalculator with a calculator that // converts the pixel data to be stored on the target storage (CPU vs GPU). @@ -259,14 +285,10 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { image_in >> pass_through.In(""); return {{ - /* landmark_lists= */ hand_landmarks_detector_graph - [Output>(kLandmarksTag)], - /* world_landmark_lists= */ - hand_landmarks_detector_graph[Output>( - kWorldLandmarksTag)], - /* hand_rects_next_frame= */ hand_rects_for_next_frame, - hand_landmarks_detector_graph[Output>( - kHandednessTag)], + /* landmark_lists= */ filtered_landmarks, + /* world_landmark_lists= */ filtered_world_landmarks, + /* hand_rects_next_frame= */ filtered_hand_rects_for_next_frame, + /* handedness= */ filtered_handedness, /* palm_rects= */ hand_detector[Output>(kPalmRectsTag)], /* palm_detections */ diff --git a/mediapipe/tasks/cc/vision/utils/BUILD b/mediapipe/tasks/cc/vision/utils/BUILD index 3e5cfd2e9..c796798df 100644 --- a/mediapipe/tasks/cc/vision/utils/BUILD +++ b/mediapipe/tasks/cc/vision/utils/BUILD @@ -79,3 +79,30 @@ cc_library( "@stblib//:stb_image", ], ) + +cc_library( + name = "landmarks_duplicates_finder", + hdrs = ["landmarks_duplicates_finder.h"], + deps = [ + "//mediapipe/framework/formats:landmark_cc_proto", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/status:statusor", + ], +) + +cc_library( + name = "landmarks_utils", + srcs = ["landmarks_utils.cc"], + hdrs = ["landmarks_utils.h"], + deps = ["//mediapipe/tasks/cc/components/containers:landmarks_detection"], +) + +cc_test( + name = "landmarks_utils_test", + srcs = ["landmarks_utils_test.cc"], + deps = [ + ":landmarks_utils", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/tasks/cc/components/containers:landmarks_detection", + ], +) diff --git a/mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h b/mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h new file mode 100644 index 000000000..e1632e6f0 --- /dev/null +++ b/mediapipe/tasks/cc/vision/utils/landmarks_duplicates_finder.h @@ -0,0 +1,40 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_ +#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_ + +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/landmark.pb.h" + +namespace mediapipe::tasks::vision::utils { + +class DuplicatesFinder { + public: + virtual ~DuplicatesFinder() = default; + // Returns indices of landmark lists to remove to make @multi_landmarks + // contain different enough (depending on the implementation) landmark lists + // only. + virtual absl::StatusOr> FindDuplicates( + const std::vector& multi_landmarks, + int input_width, int input_height) = 0; +}; + +} // namespace mediapipe::tasks::vision::utils + +#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_DUPLICATES_FINDER_H_ diff --git a/mediapipe/tasks/cc/vision/utils/landmarks_utils.cc b/mediapipe/tasks/cc/vision/utils/landmarks_utils.cc new file mode 100644 index 000000000..5ec898f15 --- /dev/null +++ b/mediapipe/tasks/cc/vision/utils/landmarks_utils.cc @@ -0,0 +1,48 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h" + +#include +#include + +namespace mediapipe::tasks::vision::utils { + +using ::mediapipe::tasks::components::containers::Bound; + +float CalculateArea(const Bound& bound) { + return (bound.right - bound.left) * (bound.bottom - bound.top); +} + +float CalculateIntersectionArea(const Bound& a, const Bound& b) { + const float intersection_left = std::max(a.left, b.left); + const float intersection_top = std::max(a.top, b.top); + const float intersection_right = std::min(a.right, b.right); + const float intersection_bottom = std::min(a.bottom, b.bottom); + + return std::max(intersection_bottom - intersection_top, 0.0) * + std::max(intersection_right - intersection_left, 0.0); +} + +float CalculateIOU(const Bound& a, const Bound& b) { + const float area_a = CalculateArea(a); + const float area_b = CalculateArea(b); + if (area_a <= 0 || area_b <= 0) return 0.0; + + const float intersection_area = CalculateIntersectionArea(a, b); + return intersection_area / (area_a + area_b - intersection_area); +} + +} // namespace mediapipe::tasks::vision::utils diff --git a/mediapipe/tasks/cc/vision/utils/landmarks_utils.h b/mediapipe/tasks/cc/vision/utils/landmarks_utils.h new file mode 100644 index 000000000..b42eae0b6 --- /dev/null +++ b/mediapipe/tasks/cc/vision/utils/landmarks_utils.h @@ -0,0 +1,41 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_ +#define MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_ + +#include +#include +#include +#include +#include + +#include "mediapipe/tasks/cc/components/containers/landmarks_detection.h" + +namespace mediapipe::tasks::vision::utils { + +// Calculates intersection over union for two bounds. +float CalculateIOU(const components::containers::Bound& a, + const components::containers::Bound& b); + +// Calculates area for face bound +float CalculateArea(const components::containers::Bound& bound); + +// Calucates intersection area of two face bounds +float CalculateIntersectionArea(const components::containers::Bound& a, + const components::containers::Bound& b); +} // namespace mediapipe::tasks::vision::utils + +#endif // MEDIAPIPE_TASKS_CC_VISION_UTILS_LANDMARKS_UTILS_H_ diff --git a/mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc b/mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc new file mode 100644 index 000000000..c30a5225b --- /dev/null +++ b/mediapipe/tasks/cc/vision/utils/landmarks_utils_test.cc @@ -0,0 +1,41 @@ +/* Copyright 2022 The MediaPipe Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/cc/vision/utils/landmarks_utils.h" + +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" + +namespace mediapipe::tasks::vision::utils { +namespace { + +TEST(LandmarkUtilsTest, CalculateIOU) { + // Do not intersect + EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 2, 3, 3})); + // No x intersection + EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {2, 0, 3, 1})); + // No y intersection + EXPECT_EQ(0, CalculateIOU({0, 0, 1, 1}, {0, 2, 1, 3})); + // Full intersection + EXPECT_EQ(1, CalculateIOU({0, 0, 2, 2}, {0, 0, 2, 2})); + + // Union is 4 intersection is 1 + EXPECT_EQ(0.25, CalculateIOU({0, 0, 3, 1}, {2, 0, 4, 1})); + + // Same in by y + EXPECT_EQ(0.25, CalculateIOU({0, 0, 1, 3}, {0, 2, 1, 4})); +} +} // namespace +} // namespace mediapipe::tasks::vision::utils