Add volume_gain_db option into AudioToTensorCalculator.

PiperOrigin-RevId: 507748012
This commit is contained in:
Jiuqiang Tang 2023-02-07 05:12:35 -08:00 committed by Copybara-Service
parent 9b040630a3
commit 01c6a8b49b
2 changed files with 12 additions and 1 deletions

View File

@ -203,6 +203,7 @@ class AudioToTensorCalculator : public Node {
std::unique_ptr<audio_dsp::QResampler<float>> resampler_; std::unique_ptr<audio_dsp::QResampler<float>> resampler_;
Matrix sample_buffer_; Matrix sample_buffer_;
int processed_buffer_cols_ = 0; int processed_buffer_cols_ = 0;
double gain_ = 1.0;
// The internal state of the FFT library. // The internal state of the FFT library.
PFFFT_Setup* fft_state_ = nullptr; PFFFT_Setup* fft_state_ = nullptr;
@ -278,7 +279,9 @@ absl::Status AudioToTensorCalculator::Open(CalculatorContext* cc) {
padding_samples_after_ = options.padding_samples_after(); padding_samples_after_ = options.padding_samples_after();
dft_tensor_format_ = options.dft_tensor_format(); dft_tensor_format_ = options.dft_tensor_format();
flush_mode_ = options.flush_mode(); flush_mode_ = options.flush_mode();
if (options.has_volume_gain_db()) {
gain_ = pow(10, options.volume_gain_db() / 20.0);
}
RET_CHECK(kAudioSampleRateIn(cc).IsConnected() ^ RET_CHECK(kAudioSampleRateIn(cc).IsConnected() ^
!kAudioIn(cc).Header().IsEmpty()) !kAudioIn(cc).Header().IsEmpty())
<< "Must either specify the time series header of the \"AUDIO\" stream " << "Must either specify the time series header of the \"AUDIO\" stream "
@ -344,6 +347,10 @@ absl::Status AudioToTensorCalculator::Process(CalculatorContext* cc) {
const Matrix& input = channels_match ? input_frame const Matrix& input = channels_match ? input_frame
// Mono mixdown. // Mono mixdown.
: input_frame.colwise().mean(); : input_frame.colwise().mean();
if (gain_ != 1.0) {
return stream_mode_ ? ProcessStreamingData(cc, input * gain_)
: ProcessNonStreamingData(cc, input * gain_);
}
return stream_mode_ ? ProcessStreamingData(cc, input) return stream_mode_ ? ProcessStreamingData(cc, input)
: ProcessNonStreamingData(cc, input); : ProcessNonStreamingData(cc, input);
} }

View File

@ -81,4 +81,8 @@ message AudioToTensorCalculatorOptions {
WITH_DC_AND_NYQUIST = 3; WITH_DC_AND_NYQUIST = 3;
} }
optional DftTensorFormat dft_tensor_format = 11 [default = WITH_NYQUIST]; optional DftTensorFormat dft_tensor_format = 11 [default = WITH_NYQUIST];
// The volume gain, measured in dB.
// Scale the input audio amplitude by 10^(volume_gain_db/20).
optional double volume_gain_db = 12;
} }