mediapipe/mediapipe/calculators/audio/spectrogram_calculator.proto
MediaPipe Team c688862570 Project import generated by Copybara.
GitOrigin-RevId: 6e5aa035cd1f6a9333962df5d3ab97a05bd5744e
2022-06-28 12:11:05 +00:00

86 lines
3.4 KiB
Protocol Buffer

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message SpectrogramCalculatorOptions {
extend CalculatorOptions {
optional SpectrogramCalculatorOptions ext = 76186688;
}
// Options mirror those of TimeSeriesFramerCalculator.
// Analysis window duration in seconds. Required. Must be greater than 0.
// (Note: the spectrogram DFT length will be the smallest power-of-2
// sample count that can hold this duration.)
optional double frame_duration_seconds = 1;
// Duration of overlap between adjacent windows.
// Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
// Note the frame_rate here is not the MediaPipe packet rate, the frame here
// means each Fourier transform analysis waveform frame, the output MediaPipe
// packet rate will the the same as input, if frame rate is lower than input
// packet rate, will result in intermittent empty output packets. Required
// that 0 <= frame_overlap_seconds < frame_duration_seconds.
optional double frame_overlap_seconds = 2 [default = 0.0];
// Whether to pad the final packet with zeros. If true, guarantees that
// all input samples will output. If set to false, any partial packet
// at the end of the stream will be dropped.
optional bool pad_final_packet = 3 [default = true];
// Output value type can be squared-magnitude, linear-magnitude,
// deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
// Their relationship:
// COMPLEX c = Re + Im*i;
// SQUARED_MAGNITUDE = Re^2 + Im^2;
// LINEAR_MAGNITUDE = sqrt(SQUARED_MAGNITUDE);
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*log10(SQUARED_MAGNITUDE);
enum OutputType {
SQUARED_MAGNITUDE = 0;
LINEAR_MAGNITUDE = 1;
DECIBELS = 2;
COMPLEX = 3;
}
optional OutputType output_type = 4 [default = SQUARED_MAGNITUDE];
// If set to true then the output will be a vector of spectrograms, one for
// each channel and the stream will have a MultiStreamTimeSeriesHeader.
optional bool allow_multichannel_input = 5 [default = false];
// Which window to use when computing the FFT.
enum WindowType {
HANN = 0;
HAMMING = 1;
COSINE = 2;
}
optional WindowType window_type = 6 [default = HANN];
// Support a fixed multiplicative scaling of the output. This is applied
// uniformly regardless of output type (i.e., even dBs are multiplied, not
// offset).
optional double output_scale = 7 [default = 1.0];
// If use_local_timestamp is true, the output packet's timestamp is based on
// the last sample of the packet and it's inferred from the latest input
// packet's timestamp. If false, the output packet's timestamp is based on
// the cumulative timestamping, which is inferred from the intial input
// timestamp and the cumulative number of samples.
optional bool use_local_timestamp = 8 [default = false];
}