From 77fcaa95973328d8dcf3990cb746904b70dcf246 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Mon, 6 Mar 2023 12:06:38 -0800 Subject: [PATCH] Allow TensorsToAudioCalculator to add and overlay adjacent windows. PiperOrigin-RevId: 514483756 --- .../tensor/tensors_to_audio_calculator.cc | 32 +++++++++++++++++-- .../tensor/tensors_to_audio_calculator.proto | 6 ++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/mediapipe/calculators/tensor/tensors_to_audio_calculator.cc b/mediapipe/calculators/tensor/tensors_to_audio_calculator.cc index 8da29bb69..d1fdceef8 100644 --- a/mediapipe/calculators/tensor/tensors_to_audio_calculator.cc +++ b/mediapipe/calculators/tensor/tensors_to_audio_calculator.cc @@ -135,6 +135,9 @@ class TensorsToAudioCalculator : public Node { // pffft requires memory to work with to avoid using the stack. std::vector> fft_workplace_; std::vector> fft_output_; + std::vector> prev_fft_output_; + int overlapping_samples_ = -1; + int step_samples_ = -1; }; absl::Status TensorsToAudioCalculator::Open(CalculatorContext* cc) { @@ -153,6 +156,22 @@ absl::Status TensorsToAudioCalculator::Open(CalculatorContext* cc) { fft_input_buffer_.resize(fft_size_); fft_workplace_.resize(fft_size_); fft_output_.resize(fft_size_); + if (options.has_num_overlapping_samples()) { + RET_CHECK(options.has_num_samples() && options.num_samples() > 0) + << "When `num_overlapping_samples` is set, `num_samples` must also be " + "specified."; + if (options.num_samples() != fft_size_) { + return absl::UnimplementedError( + "`num_samples` and `fft_size` must be equivalent."); + } + RET_CHECK(options.num_overlapping_samples() > 0 && + options.num_overlapping_samples() < options.num_samples()) + << "`num_overlapping_samples` must be greater than 0 and less than " + "`num_samples.`"; + overlapping_samples_ = options.num_overlapping_samples(); + step_samples_ = options.num_samples() - options.num_overlapping_samples(); + prev_fft_output_.resize(fft_size_); + } return absl::OkStatus(); } @@ -179,8 +198,17 @@ absl::Status TensorsToAudioCalculator::Process(CalculatorContext* cc) { fft_output_.begin(), fft_output_.end(), inv_fft_window_.begin(), fft_output_.begin(), [this](float a, float b) { return a * b * inverse_fft_size_; }); - Matrix matrix = Eigen::Map(fft_output_.data(), 1, fft_output_.size()); - kAudioOut(cc).Send(std::move(matrix)); + if (step_samples_ > 0) { + Matrix matrix = Eigen::Map(fft_output_.data(), 1, step_samples_); + matrix.leftCols(overlapping_samples_) += Eigen::Map( + prev_fft_output_.data() + step_samples_, 1, overlapping_samples_); + prev_fft_output_.swap(fft_output_); + kAudioOut(cc).Send(std::move(matrix)); + } else { + Matrix matrix = + Eigen::Map(fft_output_.data(), 1, fft_output_.size()); + kAudioOut(cc).Send(std::move(matrix)); + } return absl::OkStatus(); } diff --git a/mediapipe/calculators/tensor/tensors_to_audio_calculator.proto b/mediapipe/calculators/tensor/tensors_to_audio_calculator.proto index 907627125..42c1ae35f 100644 --- a/mediapipe/calculators/tensor/tensors_to_audio_calculator.proto +++ b/mediapipe/calculators/tensor/tensors_to_audio_calculator.proto @@ -26,4 +26,10 @@ message TensorsToAudioCalculatorOptions { // Size of the fft in number of bins. If set, the calculator will do ifft // on the input tensor. optional int64 fft_size = 1; + + // The number of samples per channel the output audio has. + optional int64 num_samples = 2; + + // The number of overlapping samples between adjacent windows. + optional int64 num_overlapping_samples = 3 [default = 0]; }