c688862570
GitOrigin-RevId: 6e5aa035cd1f6a9333962df5d3ab97a05bd5744e
86 lines
3.4 KiB
Protocol Buffer
86 lines
3.4 KiB
Protocol Buffer
// Copyright 2019 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto2";
|
|
|
|
package mediapipe;
|
|
|
|
import "mediapipe/framework/calculator.proto";
|
|
|
|
message SpectrogramCalculatorOptions {
|
|
extend CalculatorOptions {
|
|
optional SpectrogramCalculatorOptions ext = 76186688;
|
|
}
|
|
|
|
// Options mirror those of TimeSeriesFramerCalculator.
|
|
|
|
// Analysis window duration in seconds. Required. Must be greater than 0.
|
|
// (Note: the spectrogram DFT length will be the smallest power-of-2
|
|
// sample count that can hold this duration.)
|
|
optional double frame_duration_seconds = 1;
|
|
|
|
// Duration of overlap between adjacent windows.
|
|
// Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
|
|
// Note the frame_rate here is not the MediaPipe packet rate, the frame here
|
|
// means each Fourier transform analysis waveform frame, the output MediaPipe
|
|
// packet rate will the the same as input, if frame rate is lower than input
|
|
// packet rate, will result in intermittent empty output packets. Required
|
|
// that 0 <= frame_overlap_seconds < frame_duration_seconds.
|
|
optional double frame_overlap_seconds = 2 [default = 0.0];
|
|
|
|
// Whether to pad the final packet with zeros. If true, guarantees that
|
|
// all input samples will output. If set to false, any partial packet
|
|
// at the end of the stream will be dropped.
|
|
optional bool pad_final_packet = 3 [default = true];
|
|
|
|
// Output value type can be squared-magnitude, linear-magnitude,
|
|
// deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
|
|
// Their relationship:
|
|
// COMPLEX c = Re + Im*i;
|
|
// SQUARED_MAGNITUDE = Re^2 + Im^2;
|
|
// LINEAR_MAGNITUDE = sqrt(SQUARED_MAGNITUDE);
|
|
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*log10(SQUARED_MAGNITUDE);
|
|
enum OutputType {
|
|
SQUARED_MAGNITUDE = 0;
|
|
LINEAR_MAGNITUDE = 1;
|
|
DECIBELS = 2;
|
|
COMPLEX = 3;
|
|
}
|
|
optional OutputType output_type = 4 [default = SQUARED_MAGNITUDE];
|
|
|
|
// If set to true then the output will be a vector of spectrograms, one for
|
|
// each channel and the stream will have a MultiStreamTimeSeriesHeader.
|
|
optional bool allow_multichannel_input = 5 [default = false];
|
|
|
|
// Which window to use when computing the FFT.
|
|
enum WindowType {
|
|
HANN = 0;
|
|
HAMMING = 1;
|
|
COSINE = 2;
|
|
}
|
|
optional WindowType window_type = 6 [default = HANN];
|
|
|
|
// Support a fixed multiplicative scaling of the output. This is applied
|
|
// uniformly regardless of output type (i.e., even dBs are multiplied, not
|
|
// offset).
|
|
optional double output_scale = 7 [default = 1.0];
|
|
|
|
// If use_local_timestamp is true, the output packet's timestamp is based on
|
|
// the last sample of the packet and it's inferred from the latest input
|
|
// packet's timestamp. If false, the output packet's timestamp is based on
|
|
// the cumulative timestamping, which is inferred from the intial input
|
|
// timestamp and the cumulative number of samples.
|
|
optional bool use_local_timestamp = 8 [default = false];
|
|
}
|