diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index b6f50b840..7f2d7305a 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -899,16 +899,32 @@ mediapipe_proto_library( cc_library( name = "landmarks_smoothing_calculator", srcs = ["landmarks_smoothing_calculator.cc"], + hdrs = ["landmarks_smoothing_calculator.h"], deps = [ ":landmarks_smoothing_calculator_cc_proto", + ":landmarks_smoothing_calculator_utils", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:timestamp", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + ], + alwayslink = 1, +) + +cc_library( + name = "landmarks_smoothing_calculator_utils", + srcs = ["landmarks_smoothing_calculator_utils.cc"], + hdrs = ["landmarks_smoothing_calculator_utils.h"], + deps = [ + ":landmarks_smoothing_calculator_cc_proto", + "//mediapipe/framework:calculator_context", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/util/filtering:one_euro_filter", "//mediapipe/util/filtering:relative_velocity_filter", - "@com_google_absl//absl/algorithm:container", ], alwayslink = 1, ) diff --git a/mediapipe/calculators/util/landmarks_smoothing_calculator.cc b/mediapipe/calculators/util/landmarks_smoothing_calculator.cc index 7a92cfb7e..bc7504485 100644 --- a/mediapipe/calculators/util/landmarks_smoothing_calculator.cc +++ b/mediapipe/calculators/util/landmarks_smoothing_calculator.cc @@ -1,4 +1,4 @@ -// Copyright 2020 The MediaPipe Authors. +// Copyright 2023 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,471 +12,105 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mediapipe/calculators/util/landmarks_smoothing_calculator.h" + #include -#include "absl/algorithm/container.h" #include "mediapipe/calculators/util/landmarks_smoothing_calculator.pb.h" +#include "mediapipe/calculators/util/landmarks_smoothing_calculator_utils.h" +#include "mediapipe/framework/api2/node.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" -#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/timestamp.h" -#include "mediapipe/util/filtering/one_euro_filter.h" -#include "mediapipe/util/filtering/relative_velocity_filter.h" namespace mediapipe { +namespace api2 { namespace { -constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS"; -constexpr char kLandmarksTag[] = "LANDMARKS"; -constexpr char kImageSizeTag[] = "IMAGE_SIZE"; -constexpr char kObjectScaleRoiTag[] = "OBJECT_SCALE_ROI"; -constexpr char kNormalizedFilteredLandmarksTag[] = "NORM_FILTERED_LANDMARKS"; -constexpr char kFilteredLandmarksTag[] = "FILTERED_LANDMARKS"; - using ::mediapipe::NormalizedRect; -using mediapipe::OneEuroFilter; using ::mediapipe::Rect; -using mediapipe::RelativeVelocityFilter; - -void NormalizedLandmarksToLandmarks( - const NormalizedLandmarkList& norm_landmarks, const int image_width, - const int image_height, LandmarkList* landmarks) { - for (int i = 0; i < norm_landmarks.landmark_size(); ++i) { - const auto& norm_landmark = norm_landmarks.landmark(i); - - auto* landmark = landmarks->add_landmark(); - landmark->set_x(norm_landmark.x() * image_width); - landmark->set_y(norm_landmark.y() * image_height); - // Scale Z the same way as X (using image width). - landmark->set_z(norm_landmark.z() * image_width); - landmark->set_visibility(norm_landmark.visibility()); - landmark->set_presence(norm_landmark.presence()); - } -} - -void LandmarksToNormalizedLandmarks(const LandmarkList& landmarks, - const int image_width, - const int image_height, - NormalizedLandmarkList* norm_landmarks) { - for (int i = 0; i < landmarks.landmark_size(); ++i) { - const auto& landmark = landmarks.landmark(i); - - auto* norm_landmark = norm_landmarks->add_landmark(); - norm_landmark->set_x(landmark.x() / image_width); - norm_landmark->set_y(landmark.y() / image_height); - // Scale Z the same way as X (using image width). - norm_landmark->set_z(landmark.z() / image_width); - norm_landmark->set_visibility(landmark.visibility()); - norm_landmark->set_presence(landmark.presence()); - } -} - -// Estimate object scale to use its inverse value as velocity scale for -// RelativeVelocityFilter. If value will be too small (less than -// `options_.min_allowed_object_scale`) smoothing will be disabled and -// landmarks will be returned as is. -// Object scale is calculated as average between bounding box width and height -// with sides parallel to axis. -float GetObjectScale(const LandmarkList& landmarks) { - const auto& lm_minmax_x = absl::c_minmax_element( - landmarks.landmark(), - [](const auto& a, const auto& b) { return a.x() < b.x(); }); - const float x_min = lm_minmax_x.first->x(); - const float x_max = lm_minmax_x.second->x(); - - const auto& lm_minmax_y = absl::c_minmax_element( - landmarks.landmark(), - [](const auto& a, const auto& b) { return a.y() < b.y(); }); - const float y_min = lm_minmax_y.first->y(); - const float y_max = lm_minmax_y.second->y(); - - const float object_width = x_max - x_min; - const float object_height = y_max - y_min; - - return (object_width + object_height) / 2.0f; -} - -float GetObjectScale(const NormalizedRect& roi, const int image_width, - const int image_height) { - const float object_width = roi.width() * image_width; - const float object_height = roi.height() * image_height; - - return (object_width + object_height) / 2.0f; -} - -float GetObjectScale(const Rect& roi) { - return (roi.width() + roi.height()) / 2.0f; -} - -// Abstract class for various landmarks filters. -class LandmarksFilter { - public: - virtual ~LandmarksFilter() = default; - - virtual absl::Status Reset() { return absl::OkStatus(); } - - virtual absl::Status Apply(const LandmarkList& in_landmarks, - const absl::Duration& timestamp, - const absl::optional object_scale_opt, - LandmarkList* out_landmarks) = 0; -}; - -// Returns landmarks as is without smoothing. -class NoFilter : public LandmarksFilter { - public: - absl::Status Apply(const LandmarkList& in_landmarks, - const absl::Duration& timestamp, - const absl::optional object_scale_opt, - LandmarkList* out_landmarks) override { - *out_landmarks = in_landmarks; - return absl::OkStatus(); - } -}; - -// Please check RelativeVelocityFilter documentation for details. -class VelocityFilter : public LandmarksFilter { - public: - VelocityFilter(int window_size, float velocity_scale, - float min_allowed_object_scale, bool disable_value_scaling) - : window_size_(window_size), - velocity_scale_(velocity_scale), - min_allowed_object_scale_(min_allowed_object_scale), - disable_value_scaling_(disable_value_scaling) {} - - absl::Status Reset() override { - x_filters_.clear(); - y_filters_.clear(); - z_filters_.clear(); - return absl::OkStatus(); - } - - absl::Status Apply(const LandmarkList& in_landmarks, - const absl::Duration& timestamp, - const absl::optional object_scale_opt, - LandmarkList* out_landmarks) override { - // Get value scale as inverse value of the object scale. - // If value is too small smoothing will be disabled and landmarks will be - // returned as is. - float value_scale = 1.0f; - if (!disable_value_scaling_) { - const float object_scale = - object_scale_opt ? *object_scale_opt : GetObjectScale(in_landmarks); - if (object_scale < min_allowed_object_scale_) { - *out_landmarks = in_landmarks; - return absl::OkStatus(); - } - value_scale = 1.0f / object_scale; - } - - // Initialize filters once. - MP_RETURN_IF_ERROR(InitializeFiltersIfEmpty(in_landmarks.landmark_size())); - - // Filter landmarks. Every axis of every landmark is filtered separately. - for (int i = 0; i < in_landmarks.landmark_size(); ++i) { - const auto& in_landmark = in_landmarks.landmark(i); - - auto* out_landmark = out_landmarks->add_landmark(); - *out_landmark = in_landmark; - out_landmark->set_x( - x_filters_[i].Apply(timestamp, value_scale, in_landmark.x())); - out_landmark->set_y( - y_filters_[i].Apply(timestamp, value_scale, in_landmark.y())); - out_landmark->set_z( - z_filters_[i].Apply(timestamp, value_scale, in_landmark.z())); - } - - return absl::OkStatus(); - } - - private: - // Initializes filters for the first time or after Reset. If initialized then - // check the size. - absl::Status InitializeFiltersIfEmpty(const int n_landmarks) { - if (!x_filters_.empty()) { - RET_CHECK_EQ(x_filters_.size(), n_landmarks); - RET_CHECK_EQ(y_filters_.size(), n_landmarks); - RET_CHECK_EQ(z_filters_.size(), n_landmarks); - return absl::OkStatus(); - } - - x_filters_.resize(n_landmarks, - RelativeVelocityFilter(window_size_, velocity_scale_)); - y_filters_.resize(n_landmarks, - RelativeVelocityFilter(window_size_, velocity_scale_)); - z_filters_.resize(n_landmarks, - RelativeVelocityFilter(window_size_, velocity_scale_)); - - return absl::OkStatus(); - } - - int window_size_; - float velocity_scale_; - float min_allowed_object_scale_; - bool disable_value_scaling_; - - std::vector x_filters_; - std::vector y_filters_; - std::vector z_filters_; -}; - -// Please check OneEuroFilter documentation for details. -class OneEuroFilterImpl : public LandmarksFilter { - public: - OneEuroFilterImpl(double frequency, double min_cutoff, double beta, - double derivate_cutoff, float min_allowed_object_scale, - bool disable_value_scaling) - : frequency_(frequency), - min_cutoff_(min_cutoff), - beta_(beta), - derivate_cutoff_(derivate_cutoff), - min_allowed_object_scale_(min_allowed_object_scale), - disable_value_scaling_(disable_value_scaling) {} - - absl::Status Reset() override { - x_filters_.clear(); - y_filters_.clear(); - z_filters_.clear(); - return absl::OkStatus(); - } - - absl::Status Apply(const LandmarkList& in_landmarks, - const absl::Duration& timestamp, - const absl::optional object_scale_opt, - LandmarkList* out_landmarks) override { - // Initialize filters once. - MP_RETURN_IF_ERROR(InitializeFiltersIfEmpty(in_landmarks.landmark_size())); - - // Get value scale as inverse value of the object scale. - // If value is too small smoothing will be disabled and landmarks will be - // returned as is. - float value_scale = 1.0f; - if (!disable_value_scaling_) { - const float object_scale = - object_scale_opt ? *object_scale_opt : GetObjectScale(in_landmarks); - if (object_scale < min_allowed_object_scale_) { - *out_landmarks = in_landmarks; - return absl::OkStatus(); - } - value_scale = 1.0f / object_scale; - } - - // Filter landmarks. Every axis of every landmark is filtered separately. - for (int i = 0; i < in_landmarks.landmark_size(); ++i) { - const auto& in_landmark = in_landmarks.landmark(i); - - auto* out_landmark = out_landmarks->add_landmark(); - *out_landmark = in_landmark; - out_landmark->set_x( - x_filters_[i].Apply(timestamp, value_scale, in_landmark.x())); - out_landmark->set_y( - y_filters_[i].Apply(timestamp, value_scale, in_landmark.y())); - out_landmark->set_z( - z_filters_[i].Apply(timestamp, value_scale, in_landmark.z())); - } - - return absl::OkStatus(); - } - - private: - // Initializes filters for the first time or after Reset. If initialized then - // check the size. - absl::Status InitializeFiltersIfEmpty(const int n_landmarks) { - if (!x_filters_.empty()) { - RET_CHECK_EQ(x_filters_.size(), n_landmarks); - RET_CHECK_EQ(y_filters_.size(), n_landmarks); - RET_CHECK_EQ(z_filters_.size(), n_landmarks); - return absl::OkStatus(); - } - - for (int i = 0; i < n_landmarks; ++i) { - x_filters_.push_back( - OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); - y_filters_.push_back( - OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); - z_filters_.push_back( - OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); - } - - return absl::OkStatus(); - } - - double frequency_; - double min_cutoff_; - double beta_; - double derivate_cutoff_; - double min_allowed_object_scale_; - bool disable_value_scaling_; - - std::vector x_filters_; - std::vector y_filters_; - std::vector z_filters_; -}; +using ::mediapipe::landmarks_smoothing::GetObjectScale; +using ::mediapipe::landmarks_smoothing::InitializeLandmarksFilter; +using ::mediapipe::landmarks_smoothing::LandmarksFilter; +using ::mediapipe::landmarks_smoothing::LandmarksToNormalizedLandmarks; +using ::mediapipe::landmarks_smoothing::NormalizedLandmarksToLandmarks; } // namespace -// A calculator to smooth landmarks over time. -// -// Inputs: -// NORM_LANDMARKS: A NormalizedLandmarkList of landmarks you want to smooth. -// IMAGE_SIZE: A std::pair represention of image width and height. -// Required to perform all computations in absolute coordinates to avoid any -// influence of normalized values. -// OBJECT_SCALE_ROI (optional): A NormRect or Rect (depending on the format of -// input landmarks) used to determine the object scale for some of the -// filters. If not provided - object scale will be calculated from -// landmarks. -// -// Outputs: -// NORM_FILTERED_LANDMARKS: A NormalizedLandmarkList of smoothed landmarks. -// -// Example config: -// node { -// calculator: "LandmarksSmoothingCalculator" -// input_stream: "NORM_LANDMARKS:pose_landmarks" -// input_stream: "IMAGE_SIZE:image_size" -// input_stream: "OBJECT_SCALE_ROI:roi" -// output_stream: "NORM_FILTERED_LANDMARKS:pose_landmarks_filtered" -// options: { -// [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { -// velocity_filter: { -// window_size: 5 -// velocity_scale: 10.0 -// } -// } -// } -// } -// -class LandmarksSmoothingCalculator : public CalculatorBase { +class LandmarksSmoothingCalculatorImpl + : public NodeImpl { public: - static absl::Status GetContract(CalculatorContract* cc); - absl::Status Open(CalculatorContext* cc) override; - absl::Status Process(CalculatorContext* cc) override; + absl::Status Open(CalculatorContext* cc) override { + ASSIGN_OR_RETURN(landmarks_filter_, + InitializeLandmarksFilter( + cc->Options())); + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) override { + // Check that landmarks are not empty and reset the filter if so. + // Don't emit an empty packet for this timestamp. + if ((kInNormLandmarks(cc).IsConnected() && + kInNormLandmarks(cc).IsEmpty()) || + (kInLandmarks(cc).IsConnected() && kInLandmarks(cc).IsEmpty())) { + MP_RETURN_IF_ERROR(landmarks_filter_->Reset()); + return absl::OkStatus(); + } + + const auto& timestamp = + absl::Microseconds(cc->InputTimestamp().Microseconds()); + + if (kInNormLandmarks(cc).IsConnected()) { + const auto& in_norm_landmarks = kInNormLandmarks(cc).Get(); + + int image_width; + int image_height; + std::tie(image_width, image_height) = kImageSize(cc).Get(); + + absl::optional object_scale; + if (kObjectScaleRoi(cc).IsConnected() && !kObjectScaleRoi(cc).IsEmpty()) { + auto& roi = kObjectScaleRoi(cc).Get(); + object_scale = GetObjectScale(roi, image_width, image_height); + } + + auto in_landmarks = absl::make_unique(); + NormalizedLandmarksToLandmarks(in_norm_landmarks, image_width, + image_height, *in_landmarks.get()); + + auto out_landmarks = absl::make_unique(); + MP_RETURN_IF_ERROR(landmarks_filter_->Apply( + *in_landmarks, timestamp, object_scale, *out_landmarks)); + + auto out_norm_landmarks = absl::make_unique(); + LandmarksToNormalizedLandmarks(*out_landmarks, image_width, image_height, + *out_norm_landmarks.get()); + + kOutNormLandmarks(cc).Send(std::move(out_norm_landmarks)); + } else { + const auto& in_landmarks = kInLandmarks(cc).Get(); + + absl::optional object_scale; + if (kObjectScaleRoi(cc).IsConnected() && !kObjectScaleRoi(cc).IsEmpty()) { + auto& roi = kObjectScaleRoi(cc).Get(); + object_scale = GetObjectScale(roi); + } + + auto out_landmarks = absl::make_unique(); + MP_RETURN_IF_ERROR(landmarks_filter_->Apply( + in_landmarks, timestamp, object_scale, *out_landmarks)); + + kOutLandmarks(cc).Send(std::move(out_landmarks)); + } + + return absl::OkStatus(); + } private: std::unique_ptr landmarks_filter_; }; -REGISTER_CALCULATOR(LandmarksSmoothingCalculator); - -absl::Status LandmarksSmoothingCalculator::GetContract(CalculatorContract* cc) { - if (cc->Inputs().HasTag(kNormalizedLandmarksTag)) { - cc->Inputs().Tag(kNormalizedLandmarksTag).Set(); - cc->Inputs().Tag(kImageSizeTag).Set>(); - cc->Outputs() - .Tag(kNormalizedFilteredLandmarksTag) - .Set(); - - if (cc->Inputs().HasTag(kObjectScaleRoiTag)) { - cc->Inputs().Tag(kObjectScaleRoiTag).Set(); - } - } else { - cc->Inputs().Tag(kLandmarksTag).Set(); - cc->Outputs().Tag(kFilteredLandmarksTag).Set(); - - if (cc->Inputs().HasTag(kObjectScaleRoiTag)) { - cc->Inputs().Tag(kObjectScaleRoiTag).Set(); - } - } - - return absl::OkStatus(); -} - -absl::Status LandmarksSmoothingCalculator::Open(CalculatorContext* cc) { - cc->SetOffset(TimestampDiff(0)); - - // Pick landmarks filter. - const auto& options = cc->Options(); - if (options.has_no_filter()) { - landmarks_filter_ = absl::make_unique(); - } else if (options.has_velocity_filter()) { - landmarks_filter_ = absl::make_unique( - options.velocity_filter().window_size(), - options.velocity_filter().velocity_scale(), - options.velocity_filter().min_allowed_object_scale(), - options.velocity_filter().disable_value_scaling()); - } else if (options.has_one_euro_filter()) { - landmarks_filter_ = absl::make_unique( - options.one_euro_filter().frequency(), - options.one_euro_filter().min_cutoff(), - options.one_euro_filter().beta(), - options.one_euro_filter().derivate_cutoff(), - options.one_euro_filter().min_allowed_object_scale(), - options.one_euro_filter().disable_value_scaling()); - } else { - RET_CHECK_FAIL() - << "Landmarks filter is either not specified or not supported"; - } - - return absl::OkStatus(); -} - -absl::Status LandmarksSmoothingCalculator::Process(CalculatorContext* cc) { - // Check that landmarks are not empty and reset the filter if so. - // Don't emit an empty packet for this timestamp. - if ((cc->Inputs().HasTag(kNormalizedLandmarksTag) && - cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) || - (cc->Inputs().HasTag(kLandmarksTag) && - cc->Inputs().Tag(kLandmarksTag).IsEmpty())) { - MP_RETURN_IF_ERROR(landmarks_filter_->Reset()); - return absl::OkStatus(); - } - - const auto& timestamp = - absl::Microseconds(cc->InputTimestamp().Microseconds()); - - if (cc->Inputs().HasTag(kNormalizedLandmarksTag)) { - const auto& in_norm_landmarks = - cc->Inputs().Tag(kNormalizedLandmarksTag).Get(); - - int image_width; - int image_height; - std::tie(image_width, image_height) = - cc->Inputs().Tag(kImageSizeTag).Get>(); - - absl::optional object_scale; - if (cc->Inputs().HasTag(kObjectScaleRoiTag) && - !cc->Inputs().Tag(kObjectScaleRoiTag).IsEmpty()) { - auto& roi = cc->Inputs().Tag(kObjectScaleRoiTag).Get(); - object_scale = GetObjectScale(roi, image_width, image_height); - } - - auto in_landmarks = absl::make_unique(); - NormalizedLandmarksToLandmarks(in_norm_landmarks, image_width, image_height, - in_landmarks.get()); - - auto out_landmarks = absl::make_unique(); - MP_RETURN_IF_ERROR(landmarks_filter_->Apply( - *in_landmarks, timestamp, object_scale, out_landmarks.get())); - - auto out_norm_landmarks = absl::make_unique(); - LandmarksToNormalizedLandmarks(*out_landmarks, image_width, image_height, - out_norm_landmarks.get()); - - cc->Outputs() - .Tag(kNormalizedFilteredLandmarksTag) - .Add(out_norm_landmarks.release(), cc->InputTimestamp()); - } else { - const auto& in_landmarks = - cc->Inputs().Tag(kLandmarksTag).Get(); - - absl::optional object_scale; - if (cc->Inputs().HasTag(kObjectScaleRoiTag) && - !cc->Inputs().Tag(kObjectScaleRoiTag).IsEmpty()) { - auto& roi = cc->Inputs().Tag(kObjectScaleRoiTag).Get(); - object_scale = GetObjectScale(roi); - } - - auto out_landmarks = absl::make_unique(); - MP_RETURN_IF_ERROR(landmarks_filter_->Apply( - in_landmarks, timestamp, object_scale, out_landmarks.get())); - - cc->Outputs() - .Tag(kFilteredLandmarksTag) - .Add(out_landmarks.release(), cc->InputTimestamp()); - } - - return absl::OkStatus(); -} +MEDIAPIPE_NODE_IMPLEMENTATION(LandmarksSmoothingCalculatorImpl); +} // namespace api2 } // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_smoothing_calculator.h b/mediapipe/calculators/util/landmarks_smoothing_calculator.h new file mode 100644 index 000000000..a64286c15 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_smoothing_calculator.h @@ -0,0 +1,106 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_H_ + +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { +namespace api2 { + +// A calculator to smooth landmarks over time. +// +// Inputs: +// NORM_LANDMARKS (optional): A NormalizedLandmarkList of landmarks you want +// to smooth. +// LANDMARKS (optional): A LandmarkList of landmarks you want to smooth. +// IMAGE_SIZE (optional): A std::pair represention of image width +// and height. Required to perform all computations in absolute coordinates +// when smoothing NORM_LANDMARKS to avoid any influence of normalized +// values. +// OBJECT_SCALE_ROI (optional): A NormRect or Rect (depending on the format of +// input landmarks) used to determine the object scale for some of the +// filters. If not provided - object scale will be calculated from +// landmarks. +// +// Outputs: +// NORM_FILTERED_LANDMARKS (optional): A NormalizedLandmarkList of smoothed +// landmarks. +// FILTERED_LANDMARKS (optional): A LandmarkList of smoothed landmarks. +// +// Example config: +// node { +// calculator: "LandmarksSmoothingCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// input_stream: "IMAGE_SIZE:image_size" +// input_stream: "OBJECT_SCALE_ROI:roi" +// output_stream: "NORM_FILTERED_LANDMARKS:landmarks_filtered" +// options: { +// [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { +// velocity_filter: { +// window_size: 5 +// velocity_scale: 10.0 +// } +// } +// } +// } +// +class LandmarksSmoothingCalculator : public NodeIntf { + public: + static constexpr Input::Optional + kInNormLandmarks{"NORM_LANDMARKS"}; + static constexpr Input::Optional kInLandmarks{ + "LANDMARKS"}; + static constexpr Input>::Optional kImageSize{ + "IMAGE_SIZE"}; + static constexpr Input>::Optional kObjectScaleRoi{ + "OBJECT_SCALE_ROI"}; + static constexpr Output::Optional + kOutNormLandmarks{"NORM_FILTERED_LANDMARKS"}; + static constexpr Output::Optional kOutLandmarks{ + "FILTERED_LANDMARKS"}; + MEDIAPIPE_NODE_INTERFACE(LandmarksSmoothingCalculator, kInNormLandmarks, + kInLandmarks, kImageSize, kObjectScaleRoi, + kOutNormLandmarks, kOutLandmarks); + + static absl::Status UpdateContract(CalculatorContract* cc) { + RET_CHECK(kInNormLandmarks(cc).IsConnected() ^ + kInLandmarks(cc).IsConnected()) + << "One and only one of NORM_LANDMARKS and LANDMARKS input is allowed"; + + // TODO: Verify scale ROI is of the same type as landmarks + // that are being smoothed. + + if (kInNormLandmarks(cc).IsConnected()) { + RET_CHECK(kImageSize(cc).IsConnected()); + RET_CHECK(kOutNormLandmarks(cc).IsConnected()); + RET_CHECK(!kOutLandmarks(cc).IsConnected()); + } else { + RET_CHECK(!kImageSize(cc).IsConnected()); + RET_CHECK(kOutLandmarks(cc).IsConnected()); + RET_CHECK(!kOutNormLandmarks(cc).IsConnected()); + } + + return absl::OkStatus(); + } +}; + +} // namespace api2 +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.cc b/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.cc new file mode 100644 index 000000000..59e773ed9 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.cc @@ -0,0 +1,322 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/landmarks_smoothing_calculator_utils.h" + +#include "mediapipe/calculators/util/landmarks_smoothing_calculator.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/filtering/one_euro_filter.h" +#include "mediapipe/util/filtering/relative_velocity_filter.h" + +namespace mediapipe { +namespace landmarks_smoothing { + +namespace { + +using ::mediapipe::NormalizedRect; +using ::mediapipe::OneEuroFilter; +using ::mediapipe::Rect; +using ::mediapipe::RelativeVelocityFilter; + +// Estimate object scale to use its inverse value as velocity scale for +// RelativeVelocityFilter. If value will be too small (less than +// `options_.min_allowed_object_scale`) smoothing will be disabled and +// landmarks will be returned as is. +// Object scale is calculated as average between bounding box width and height +// with sides parallel to axis. +float GetObjectScale(const LandmarkList& landmarks) { + const auto& lm_minmax_x = absl::c_minmax_element( + landmarks.landmark(), + [](const auto& a, const auto& b) { return a.x() < b.x(); }); + const float x_min = lm_minmax_x.first->x(); + const float x_max = lm_minmax_x.second->x(); + + const auto& lm_minmax_y = absl::c_minmax_element( + landmarks.landmark(), + [](const auto& a, const auto& b) { return a.y() < b.y(); }); + const float y_min = lm_minmax_y.first->y(); + const float y_max = lm_minmax_y.second->y(); + + const float object_width = x_max - x_min; + const float object_height = y_max - y_min; + + return (object_width + object_height) / 2.0f; +} + +// Returns landmarks as is without smoothing. +class NoFilter : public LandmarksFilter { + public: + absl::Status Apply(const LandmarkList& in_landmarks, + const absl::Duration& timestamp, + const absl::optional object_scale_opt, + LandmarkList& out_landmarks) override { + out_landmarks = in_landmarks; + return absl::OkStatus(); + } +}; + +// Please check RelativeVelocityFilter documentation for details. +class VelocityFilter : public LandmarksFilter { + public: + VelocityFilter(int window_size, float velocity_scale, + float min_allowed_object_scale, bool disable_value_scaling) + : window_size_(window_size), + velocity_scale_(velocity_scale), + min_allowed_object_scale_(min_allowed_object_scale), + disable_value_scaling_(disable_value_scaling) {} + + absl::Status Reset() override { + x_filters_.clear(); + y_filters_.clear(); + z_filters_.clear(); + return absl::OkStatus(); + } + + absl::Status Apply(const LandmarkList& in_landmarks, + const absl::Duration& timestamp, + const absl::optional object_scale_opt, + LandmarkList& out_landmarks) override { + // Get value scale as inverse value of the object scale. + // If value is too small smoothing will be disabled and landmarks will be + // returned as is. + float value_scale = 1.0f; + if (!disable_value_scaling_) { + const float object_scale = + object_scale_opt ? *object_scale_opt : GetObjectScale(in_landmarks); + if (object_scale < min_allowed_object_scale_) { + out_landmarks = in_landmarks; + return absl::OkStatus(); + } + value_scale = 1.0f / object_scale; + } + + // Initialize filters once. + MP_RETURN_IF_ERROR(InitializeFiltersIfEmpty(in_landmarks.landmark_size())); + + // Filter landmarks. Every axis of every landmark is filtered separately. + for (int i = 0; i < in_landmarks.landmark_size(); ++i) { + const auto& in_landmark = in_landmarks.landmark(i); + + auto* out_landmark = out_landmarks.add_landmark(); + *out_landmark = in_landmark; + out_landmark->set_x( + x_filters_[i].Apply(timestamp, value_scale, in_landmark.x())); + out_landmark->set_y( + y_filters_[i].Apply(timestamp, value_scale, in_landmark.y())); + out_landmark->set_z( + z_filters_[i].Apply(timestamp, value_scale, in_landmark.z())); + } + + return absl::OkStatus(); + } + + private: + // Initializes filters for the first time or after Reset. If initialized then + // check the size. + absl::Status InitializeFiltersIfEmpty(const int n_landmarks) { + if (!x_filters_.empty()) { + RET_CHECK_EQ(x_filters_.size(), n_landmarks); + RET_CHECK_EQ(y_filters_.size(), n_landmarks); + RET_CHECK_EQ(z_filters_.size(), n_landmarks); + return absl::OkStatus(); + } + + x_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + y_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + z_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + + return absl::OkStatus(); + } + + int window_size_; + float velocity_scale_; + float min_allowed_object_scale_; + bool disable_value_scaling_; + + std::vector x_filters_; + std::vector y_filters_; + std::vector z_filters_; +}; + +// Please check OneEuroFilter documentation for details. +class OneEuroFilterImpl : public LandmarksFilter { + public: + OneEuroFilterImpl(double frequency, double min_cutoff, double beta, + double derivate_cutoff, float min_allowed_object_scale, + bool disable_value_scaling) + : frequency_(frequency), + min_cutoff_(min_cutoff), + beta_(beta), + derivate_cutoff_(derivate_cutoff), + min_allowed_object_scale_(min_allowed_object_scale), + disable_value_scaling_(disable_value_scaling) {} + + absl::Status Reset() override { + x_filters_.clear(); + y_filters_.clear(); + z_filters_.clear(); + return absl::OkStatus(); + } + + absl::Status Apply(const LandmarkList& in_landmarks, + const absl::Duration& timestamp, + const absl::optional object_scale_opt, + LandmarkList& out_landmarks) override { + // Initialize filters once. + MP_RETURN_IF_ERROR(InitializeFiltersIfEmpty(in_landmarks.landmark_size())); + + // Get value scale as inverse value of the object scale. + // If value is too small smoothing will be disabled and landmarks will be + // returned as is. + float value_scale = 1.0f; + if (!disable_value_scaling_) { + const float object_scale = + object_scale_opt ? *object_scale_opt : GetObjectScale(in_landmarks); + if (object_scale < min_allowed_object_scale_) { + out_landmarks = in_landmarks; + return absl::OkStatus(); + } + value_scale = 1.0f / object_scale; + } + + // Filter landmarks. Every axis of every landmark is filtered separately. + for (int i = 0; i < in_landmarks.landmark_size(); ++i) { + const auto& in_landmark = in_landmarks.landmark(i); + + auto* out_landmark = out_landmarks.add_landmark(); + *out_landmark = in_landmark; + out_landmark->set_x( + x_filters_[i].Apply(timestamp, value_scale, in_landmark.x())); + out_landmark->set_y( + y_filters_[i].Apply(timestamp, value_scale, in_landmark.y())); + out_landmark->set_z( + z_filters_[i].Apply(timestamp, value_scale, in_landmark.z())); + } + + return absl::OkStatus(); + } + + private: + // Initializes filters for the first time or after Reset. If initialized then + // check the size. + absl::Status InitializeFiltersIfEmpty(const int n_landmarks) { + if (!x_filters_.empty()) { + RET_CHECK_EQ(x_filters_.size(), n_landmarks); + RET_CHECK_EQ(y_filters_.size(), n_landmarks); + RET_CHECK_EQ(z_filters_.size(), n_landmarks); + return absl::OkStatus(); + } + + for (int i = 0; i < n_landmarks; ++i) { + x_filters_.push_back( + OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); + y_filters_.push_back( + OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); + z_filters_.push_back( + OneEuroFilter(frequency_, min_cutoff_, beta_, derivate_cutoff_)); + } + + return absl::OkStatus(); + } + + double frequency_; + double min_cutoff_; + double beta_; + double derivate_cutoff_; + double min_allowed_object_scale_; + bool disable_value_scaling_; + + std::vector x_filters_; + std::vector y_filters_; + std::vector z_filters_; +}; + +} // namespace + +void NormalizedLandmarksToLandmarks( + const NormalizedLandmarkList& norm_landmarks, const int image_width, + const int image_height, LandmarkList& landmarks) { + for (int i = 0; i < norm_landmarks.landmark_size(); ++i) { + const auto& norm_landmark = norm_landmarks.landmark(i); + + auto* landmark = landmarks.add_landmark(); + landmark->set_x(norm_landmark.x() * image_width); + landmark->set_y(norm_landmark.y() * image_height); + // Scale Z the same way as X (using image width). + landmark->set_z(norm_landmark.z() * image_width); + landmark->set_visibility(norm_landmark.visibility()); + landmark->set_presence(norm_landmark.presence()); + } +} + +void LandmarksToNormalizedLandmarks(const LandmarkList& landmarks, + const int image_width, + const int image_height, + NormalizedLandmarkList& norm_landmarks) { + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const auto& landmark = landmarks.landmark(i); + + auto* norm_landmark = norm_landmarks.add_landmark(); + norm_landmark->set_x(landmark.x() / image_width); + norm_landmark->set_y(landmark.y() / image_height); + // Scale Z the same way as X (using image width). + norm_landmark->set_z(landmark.z() / image_width); + norm_landmark->set_visibility(landmark.visibility()); + norm_landmark->set_presence(landmark.presence()); + } +} + +float GetObjectScale(const NormalizedRect& roi, const int image_width, + const int image_height) { + const float object_width = roi.width() * image_width; + const float object_height = roi.height() * image_height; + + return (object_width + object_height) / 2.0f; +} + +float GetObjectScale(const Rect& roi) { + return (roi.width() + roi.height()) / 2.0f; +} + +absl::StatusOr> InitializeLandmarksFilter( + const LandmarksSmoothingCalculatorOptions& options) { + if (options.has_no_filter()) { + return absl::make_unique(); + } else if (options.has_velocity_filter()) { + return absl::make_unique( + options.velocity_filter().window_size(), + options.velocity_filter().velocity_scale(), + options.velocity_filter().min_allowed_object_scale(), + options.velocity_filter().disable_value_scaling()); + } else if (options.has_one_euro_filter()) { + return absl::make_unique( + options.one_euro_filter().frequency(), + options.one_euro_filter().min_cutoff(), + options.one_euro_filter().beta(), + options.one_euro_filter().derivate_cutoff(), + options.one_euro_filter().min_allowed_object_scale(), + options.one_euro_filter().disable_value_scaling()); + } else { + RET_CHECK_FAIL() + << "Landmarks filter is either not specified or not supported"; + } +} + +} // namespace landmarks_smoothing +} // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.h b/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.h new file mode 100644 index 000000000..267053907 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_smoothing_calculator_utils.h @@ -0,0 +1,61 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_UTILS_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_UTILS_H_ + +#include "mediapipe/calculators/util/landmarks_smoothing_calculator.pb.h" +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/util/filtering/one_euro_filter.h" +#include "mediapipe/util/filtering/relative_velocity_filter.h" + +namespace mediapipe { +namespace landmarks_smoothing { + +void NormalizedLandmarksToLandmarks( + const mediapipe::NormalizedLandmarkList& norm_landmarks, + const int image_width, const int image_height, + mediapipe::LandmarkList& landmarks); + +void LandmarksToNormalizedLandmarks( + const mediapipe::LandmarkList& landmarks, const int image_width, + const int image_height, mediapipe::NormalizedLandmarkList& norm_landmarks); + +float GetObjectScale(const NormalizedRect& roi, const int image_width, + const int image_height); + +float GetObjectScale(const Rect& roi); + +// Abstract class for various landmarks filters. +class LandmarksFilter { + public: + virtual ~LandmarksFilter() = default; + + virtual absl::Status Reset() { return absl::OkStatus(); } + + virtual absl::Status Apply(const mediapipe::LandmarkList& in_landmarks, + const absl::Duration& timestamp, + const absl::optional object_scale_opt, + mediapipe::LandmarkList& out_landmarks) = 0; +}; + +absl::StatusOr> InitializeLandmarksFilter( + const mediapipe::LandmarksSmoothingCalculatorOptions& options); + +} // namespace landmarks_smoothing +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_SMOOTHING_CALCULATOR_UTILS_H_