a2a63e3876
GitOrigin-RevId: 796203faee20d7aae2876aac8ca5a1827dee4fe3
296 lines
8.3 KiB
Plaintext
296 lines
8.3 KiB
Plaintext
input_side_packet: "input_sequence_example"
|
|
input_side_packet: "inception3_pca_mean_matrix"
|
|
input_side_packet: "inception3_pca_projection_matrix"
|
|
input_side_packet: "vggish_pca_mean_matrix"
|
|
input_side_packet: "vggish_pca_projection_matrix"
|
|
output_side_packet: "sequence_example_to_serialize"
|
|
|
|
node {
|
|
calculator: "StringToSequenceExampleCalculator"
|
|
input_side_packet: "STRING:input_sequence_example"
|
|
output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
|
}
|
|
|
|
node {
|
|
calculator: "UnpackMediaSequenceCalculator"
|
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
|
output_side_packet: "DATA_PATH:input_file"
|
|
output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
|
output_side_packet: "AUDIO_DECODER_OPTIONS:audio_decoder_options"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.UnpackMediaSequenceCalculatorOptions]: {
|
|
base_packet_resampler_options {
|
|
frame_rate: 1.0
|
|
base_timestamp: 0
|
|
}
|
|
base_audio_decoder_options {
|
|
audio_stream { stream_index: 0 }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Decode the entire video.
|
|
node {
|
|
calculator: "OpenCvVideoDecoderCalculator"
|
|
input_side_packet: "INPUT_FILE_PATH:input_file"
|
|
output_stream: "VIDEO:decoded_frames"
|
|
}
|
|
|
|
# Extract the subset of frames we want to keep.
|
|
node {
|
|
calculator: "PacketResamplerCalculator"
|
|
input_stream: "decoded_frames"
|
|
output_stream: "sampled_decoded_frames"
|
|
input_side_packet: "OPTIONS:packet_resampler_options"
|
|
}
|
|
|
|
node {
|
|
calculator: "ImageFrameToTensorCalculator"
|
|
input_stream: "sampled_decoded_frames"
|
|
output_stream: "tensor_frame"
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorFlowSessionFromFrozenGraphCalculator"
|
|
output_side_packet: "SESSION:session"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorFlowSessionFromFrozenGraphCalculatorOptions]: {
|
|
graph_proto_path: "/tmp/mediapipe/classify_image_graph_def.pb"
|
|
tag_to_tensor_names {
|
|
key: "IMG_UINT8"
|
|
value: "DecodeJpeg:0"
|
|
}
|
|
tag_to_tensor_names {
|
|
key: "INCEPTION_POOL3"
|
|
value: "pool_3/_reshape:0"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorFlowInferenceCalculator"
|
|
input_side_packet: "SESSION:session"
|
|
input_stream: "IMG_UINT8:tensor_frame"
|
|
output_stream: "INCEPTION_POOL3:inception3_hidden_activation_single_element_batch"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
|
|
signature_name: ""
|
|
batch_size: 1
|
|
add_batch_dim_to_tensors: false
|
|
}
|
|
}
|
|
}
|
|
|
|
# Remove the batch dimension.
|
|
node: {
|
|
calculator: "TensorSqueezeDimensionsCalculator"
|
|
input_stream: "inception3_hidden_activation_single_element_batch"
|
|
output_stream: "inception3_hidden_activation"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorSqueezeDimensionsCalculatorOptions]: {
|
|
dim: 0
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorToMatrixCalculator"
|
|
input_stream: "TENSOR:inception3_hidden_activation"
|
|
output_stream: "MATRIX:inception3_hidden_activation_matrix"
|
|
}
|
|
|
|
node {
|
|
calculator: "MatrixSubtractCalculator"
|
|
input_stream: "MINUEND:inception3_hidden_activation_matrix"
|
|
input_side_packet: "SUBTRAHEND:inception3_pca_mean_matrix"
|
|
output_stream: "mean_subtracted_inception3_matrix"
|
|
}
|
|
node {
|
|
calculator: "MatrixMultiplyCalculator"
|
|
input_stream: "mean_subtracted_inception3_matrix"
|
|
input_side_packet: "inception3_pca_projection_matrix"
|
|
output_stream: "pca_inception3_matrix"
|
|
}
|
|
node {
|
|
calculator: "MatrixToVectorCalculator"
|
|
input_stream: "pca_inception3_matrix"
|
|
output_stream: "pca_inception3_vf"
|
|
}
|
|
|
|
######################## END OF VISUAL ###########################
|
|
|
|
######################## BEGIN OF AUDIO ##########################
|
|
node {
|
|
calculator: "AudioDecoderCalculator"
|
|
input_side_packet: "INPUT_FILE_PATH:input_file"
|
|
input_side_packet: "OPTIONS:audio_decoder_options"
|
|
output_stream: "AUDIO:audio"
|
|
output_stream: "AUDIO_HEADER:audio_header"
|
|
}
|
|
|
|
node {
|
|
calculator: "AddHeaderCalculator"
|
|
input_stream: "DATA:audio"
|
|
input_stream: "HEADER:audio_header"
|
|
output_stream: "media_audio"
|
|
}
|
|
|
|
# Always convert the audio to mono.
|
|
node {
|
|
calculator: "AverageTimeSeriesAcrossChannelsCalculator"
|
|
input_stream: "media_audio"
|
|
output_stream: "mono_waveform"
|
|
}
|
|
|
|
node {
|
|
calculator: "RationalFactorResampleCalculator"
|
|
input_stream: "mono_waveform"
|
|
output_stream: "resampled_waveform"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.RationalFactorResampleCalculatorOptions] {
|
|
target_sample_rate: 16000.0
|
|
}
|
|
}
|
|
}
|
|
node {
|
|
calculator: "SpectrogramCalculator"
|
|
input_stream: "resampled_waveform"
|
|
output_stream: "spectrogram_squared_magnitude"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.SpectrogramCalculatorOptions] {
|
|
frame_duration_seconds: 0.025
|
|
frame_overlap_seconds: 0.015
|
|
output_type: SQUARED_MAGNITUDE
|
|
}
|
|
}
|
|
}
|
|
node {
|
|
calculator: "MelSpectrumCalculator"
|
|
# MelSpectrumCalculator expects SQUARED_MAGNITUDE input, but its output is in
|
|
# linear magnitude units.
|
|
input_stream: "spectrogram_squared_magnitude"
|
|
output_stream: "mel_spectrum_magnitude"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.MelSpectrumCalculatorOptions] {
|
|
# Follow the 'wideband' or '16kHz' speech convention.
|
|
channel_count: 64
|
|
min_frequency_hertz: 125.0
|
|
max_frequency_hertz: 7500.0
|
|
}
|
|
}
|
|
}
|
|
node {
|
|
calculator: "StabilizedLogCalculator"
|
|
input_stream: "mel_spectrum_magnitude"
|
|
output_stream: "log_mel_spectrum_magnitude"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.StabilizedLogCalculatorOptions] {
|
|
stabilizer: 0.01
|
|
}
|
|
}
|
|
}
|
|
node {
|
|
calculator: "TimeSeriesFramerCalculator"
|
|
input_stream: "log_mel_spectrum_magnitude"
|
|
output_stream: "log_mel_spectrum_magnitude_with_context"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TimeSeriesFramerCalculatorOptions] {
|
|
frame_duration_seconds: 0.96
|
|
frame_overlap_seconds: -0.04
|
|
}
|
|
}
|
|
}
|
|
node {
|
|
calculator: "MatrixToTensorCalculator"
|
|
input_stream: "log_mel_spectrum_magnitude_with_context"
|
|
output_stream: "log_mel_spectrum_magnitude_tensor"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.MatrixToTensorCalculatorOptions] {
|
|
transpose: true
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorFlowSessionFromFrozenGraphCalculator"
|
|
output_side_packet: "SESSION:vggish_session"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorFlowSessionFromFrozenGraphCalculatorOptions]: {
|
|
graph_proto_path: "/tmp/mediapipe/vggish_new.pb"
|
|
tag_to_tensor_names {
|
|
key: "INPUT"
|
|
value: "vggish/input_features:0"
|
|
}
|
|
tag_to_tensor_names {
|
|
key: "VGGISH"
|
|
value: "vggish/fc2/BiasAdd:0"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorFlowInferenceCalculator"
|
|
input_side_packet: "SESSION:vggish_session"
|
|
input_stream: "INPUT:log_mel_spectrum_magnitude_tensor"
|
|
output_stream: "VGGISH:vggish_tensor"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: {
|
|
signature_name: ""
|
|
batch_size: 128
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "TensorToMatrixCalculator"
|
|
input_stream: "REFERENCE:log_mel_spectrum_magnitude_with_context"
|
|
input_stream: "TENSOR:vggish_tensor"
|
|
output_stream: "MATRIX:vggish_matrix"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TensorToMatrixCalculatorOptions] {
|
|
time_series_header_overrides {
|
|
num_channels: 128
|
|
num_samples: 1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
node {
|
|
calculator: "MatrixSubtractCalculator"
|
|
input_stream: "MINUEND:vggish_matrix"
|
|
input_side_packet: "SUBTRAHEND:vggish_pca_mean_matrix"
|
|
output_stream: "mean_subtracted_vggish_matrix"
|
|
}
|
|
node {
|
|
calculator: "MatrixMultiplyCalculator"
|
|
input_stream: "mean_subtracted_vggish_matrix"
|
|
input_side_packet: "vggish_pca_projection_matrix"
|
|
output_stream: "pca_vggish_matrix"
|
|
}
|
|
node {
|
|
calculator: "MatrixToVectorCalculator"
|
|
input_stream: "pca_vggish_matrix"
|
|
output_stream: "pca_vggish_vf"
|
|
}
|
|
|
|
# Store the features in the SequenceExample.
|
|
node {
|
|
calculator: "PackMediaSequenceCalculator"
|
|
input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example"
|
|
output_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
|
input_stream: "FLOAT_FEATURE_RGB:pca_inception3_vf"
|
|
input_stream: "FLOAT_FEATURE_AUDIO:pca_vggish_vf"
|
|
}
|
|
|
|
# Serialize the SequenceExample to a string for storage.
|
|
node {
|
|
calculator: "StringToSequenceExampleCalculator"
|
|
input_side_packet: "SEQUENCE_EXAMPLE:sequence_example_to_serialize"
|
|
output_side_packet: "STRING:output_sequence_example"
|
|
}
|
|
|