Internal change

PiperOrigin-RevId: 489484898
2022-11-18 08:44:02 -08:00 · 2022-11-18 08:44:02 -08:00 · e046982a3c
commit e046982a3c
parent ae44012c0c
2 changed files with 54 additions and 8 deletions
--- a/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc
+++ b/mediapipe/calculators/tensor/audio_to_tensor_calculator.cc
@ -43,6 +43,7 @@ namespace api2 {
 namespace {

 using Options = ::mediapipe::AudioToTensorCalculatorOptions;
+using DftTensorFormat = Options::DftTensorFormat;
 using FlushMode = Options::FlushMode;

 std::vector<float> HannWindow(int window_size, bool sqrt_hann) {
@ -188,6 +189,8 @@ class AudioToTensorCalculator : public Node {
  int padding_samples_before_;
  int padding_samples_after_;
  FlushMode flush_mode_;
+  DftTensorFormat dft_tensor_format_;
+
  Timestamp initial_timestamp_ = Timestamp::Unstarted();
  int64 cumulative_input_samples_ = 0;
  Timestamp next_output_timestamp_ = Timestamp::Unstarted();
@ -273,6 +276,7 @@ absl::Status AudioToTensorCalculator::Open(CalculatorContext* cc) {
  }
  padding_samples_before_ = options.padding_samples_before();
  padding_samples_after_ = options.padding_samples_after();
+  dft_tensor_format_ = options.dft_tensor_format();
  flush_mode_ = options.flush_mode();

  RET_CHECK(kAudioSampleRateIn(cc).IsConnected() ^
@ -492,14 +496,43 @@ absl::Status AudioToTensorCalculator::OutputTensor(const Matrix& block,
      kDcAndNyquistOut(cc).Send(std::make_pair(fft_output_[0], fft_output_[1]),
                                timestamp);
    }
-    Matrix fft_output_matrix =
-        Eigen::Map<const Matrix>(fft_output_.data() + 2, 1, fft_size_ - 2);
-    fft_output_matrix.conservativeResize(Eigen::NoChange, fft_size_);
-    // The last two elements are the DFT Nyquist values.
-    fft_output_matrix(fft_size_ - 2) = fft_output_[1];  // Nyquist real part
-    fft_output_matrix(fft_size_ - 1) = 0.0f;            // Nyquist imagery part
-    ASSIGN_OR_RETURN(output_tensor,
-                     ConvertToTensor(fft_output_matrix, {2, fft_size_ / 2}));
+    switch (dft_tensor_format_) {
+      case Options::WITH_NYQUIST: {
+        Matrix fft_output_matrix =
+            Eigen::Map<const Matrix>(fft_output_.data() + 2, 1, fft_size_ - 2);
+        fft_output_matrix.conservativeResize(Eigen::NoChange, fft_size_);
+        // The last two elements are Nyquist component.
+        fft_output_matrix(fft_size_ - 2) = fft_output_[1];  // Nyquist real part
+        fft_output_matrix(fft_size_ - 1) = 0.0f;  // Nyquist imagery part
+        ASSIGN_OR_RETURN(output_tensor, ConvertToTensor(fft_output_matrix,
+                                                        {2, fft_size_ / 2}));
+        break;
+      }
+      case Options::WITH_DC_AND_NYQUIST: {
+        Matrix fft_output_matrix =
+            Eigen::Map<const Matrix>(fft_output_.data(), 1, fft_size_);
+        fft_output_matrix.conservativeResize(Eigen::NoChange, fft_size_ + 2);
+        fft_output_matrix(1) = 0.0f;  // DC imagery part.
+        // The last two elements are  Nyquist component.
+        fft_output_matrix(fft_size_) = fft_output_[1];  // Nyquist real part
+        fft_output_matrix(fft_size_ + 1) = 0.0f;        // Nyquist imagery part
+        ASSIGN_OR_RETURN(
+            output_tensor,
+            ConvertToTensor(fft_output_matrix, {2, (fft_size_ + 2) / 2}));
+        break;
+      }
+      case Options::WITHOUT_DC_AND_NYQUIST: {
+        Matrix fft_output_matrix =
+            Eigen::Map<const Matrix>(fft_output_.data() + 2, 1, fft_size_ - 2);
+        ASSIGN_OR_RETURN(
+            output_tensor,
+            ConvertToTensor(fft_output_matrix, {2, (fft_size_ - 2) / 2}));
+        break;
+      }
+      default:
+        return absl::InvalidArgumentError("Unsupported dft tensor format.");
+    }
+
  } else {
    ASSIGN_OR_RETURN(output_tensor,
                     ConvertToTensor(block, {num_channels_, num_samples_}));
--- a/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto
+++ b/mediapipe/calculators/tensor/audio_to_tensor_calculator.proto
@ -68,4 +68,17 @@ message AudioToTensorCalculatorOptions {
  }

  optional FlushMode flush_mode = 10 [default = ENTIRE_TAIL_AT_TIMESTAMP_MAX];
+
+  enum DftTensorFormat {
+    DFT_TENSOR_FORMAT_UNKNOWN = 0;
+    // The output dft tensor without dc and nyquist components.
+    WITHOUT_DC_AND_NYQUIST = 1;
+    // The output dft tensor contains the nyquist component as the last
+    // two values.
+    WITH_NYQUIST = 2;
+    // The output dft tensor contains the dc component as the first two values
+    // and the nyquist component as the last two values.
+    WITH_DC_AND_NYQUIST = 3;
+  }
+  optional DftTensorFormat dft_tensor_format = 11 [default = WITH_NYQUIST];
 }