diff --git a/gradio/demo.py b/gradio/demo.py
index a158423b1..d0a1967d4 100644
--- a/gradio/demo.py
+++ b/gradio/demo.py
@@ -1,5 +1,13 @@
import mediapipe as mp
import gradio as gr
+import cv2
+import torch
+
+
+# Images
+torch.hub.download_url_to_file('https://artbreeder.b-cdn.net/imgs/c789e54661bfb432c5522a36553f.jpeg', 'face1.jpg')
+torch.hub.download_url_to_file('https://artbreeder.b-cdn.net/imgs/c86622e8cb58d490e35b01cb9996.jpeg', 'face2.jpg')
+
mp_face_mesh = mp.solutions.face_mesh
# Prepare DrawingSpec for drawing the face landmarks later.
@@ -16,16 +24,28 @@ def inference(image):
# Convert the BGR image to RGB and process it with MediaPipe Face Mesh.
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
- # Draw face landmarks of each face.
- print(f'Face landmarks of {name}:')
- if not results.multi_face_landmarks:
- continue
annotated_image = image.copy()
for face_landmarks in results.multi_face_landmarks:
- mp_drawing.draw_landmarks(
- image=annotated_image,
- landmark_list=face_landmarks,
- connections=mp_face_mesh.FACE_CONNECTIONS,
- landmark_drawing_spec=drawing_spec,
- connection_drawing_spec=drawing_spec)
- return annotated_image
\ No newline at end of file
+ mp_drawing.draw_landmarks(
+ image=annotated_image,
+ landmark_list=face_landmarks,
+ connections=mp_face_mesh.FACE_CONNECTIONS,
+ landmark_drawing_spec=drawing_spec,
+ connection_drawing_spec=drawing_spec)
+ return annotated_image
+
+title = "Face Mesh"
+description = "demo for Face Mesh. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
+article = "
Attention Mesh: High-fidelity Face Mesh Prediction in Real-time | Github Repo
"
+
+gr.Interface(
+ inference,
+ [gr.inputs.Image(label="Input")],
+ gr.outputs.Image(type="pil", label="Output"),
+ title=title,
+ description=description,
+ article=article,
+ examples=[
+ ["face1.jpg"],
+ ["face2.jpg"]
+ ]).launch(debug=True)
\ No newline at end of file
diff --git a/mediapipe/BUILD b/mediapipe/BUILD
deleted file mode 100644
index 1171ea6f0..000000000
--- a/mediapipe/BUILD
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright 2019 The MediaPipe Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-licenses(["notice"]) # Apache 2.0
-
-# Note: yes, these need to use "//external:android/crosstool", not
-# @androidndk//:default_crosstool.
-
-config_setting(
- name = "android",
- values = {"crosstool_top": "//external:android/crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "android_x86",
- values = {
- "crosstool_top": "//external:android/crosstool",
- "cpu": "x86",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "android_x86_64",
- values = {
- "crosstool_top": "//external:android/crosstool",
- "cpu": "x86_64",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "android_armeabi",
- values = {
- "crosstool_top": "//external:android/crosstool",
- "cpu": "armeabi",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "android_arm",
- values = {
- "crosstool_top": "//external:android/crosstool",
- "cpu": "armeabi-v7a",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "android_arm64",
- values = {
- "crosstool_top": "//external:android/crosstool",
- "cpu": "arm64-v8a",
- },
- visibility = ["//visibility:public"],
-)
-
-# Note: this cannot just match "apple_platform_type": "macos" because that option
-# defaults to "macos" even when building on Linux!
-alias(
- name = "macos",
- actual = select({
- ":macos_i386": ":macos_i386",
- ":macos_x86_64": ":macos_x86_64",
- "//conditions:default": ":macos_i386", # Arbitrarily chosen from above.
- }),
- visibility = ["//visibility:public"],
-)
-
-# Note: this also matches on crosstool_top so that it does not produce ambiguous
-# selectors when used together with "android".
-config_setting(
- name = "ios",
- values = {
- "crosstool_top": "@bazel_tools//tools/cpp:toolchain",
- "apple_platform_type": "ios",
- },
- visibility = ["//visibility:public"],
-)
-
-alias(
- name = "apple",
- actual = select({
- ":macos": ":macos",
- ":ios": ":ios",
- "//conditions:default": ":ios", # Arbitrarily chosen from above.
- }),
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "macos_i386",
- values = {
- "apple_platform_type": "macos",
- "cpu": "darwin",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "macos_x86_64",
- values = {
- "apple_platform_type": "macos",
- "cpu": "darwin_x86_64",
- },
- visibility = ["//visibility:public"],
-)
-
-[
- config_setting(
- name = arch,
- values = {"cpu": arch},
- visibility = ["//visibility:public"],
- )
- for arch in [
- "ios_i386",
- "ios_x86_64",
- "ios_armv7",
- "ios_arm64",
- "ios_arm64e",
- ]
-]
-
-config_setting(
- name = "windows",
- values = {"cpu": "x64_windows"},
-)
-
-exports_files(
- ["provisioning_profile.mobileprovision"],
- visibility = ["//visibility:public"],
-)
diff --git a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
deleted file mode 100644
index f3b74900c..000000000
--- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
+++ /dev/null
@@ -1,137 +0,0 @@
-{
- "additionalFilePaths" : [
- "/BUILD",
- "mediapipe/BUILD",
- "mediapipe/examples/ios/common/BUILD",
- "mediapipe/examples/ios/facedetectioncpu/BUILD",
- "mediapipe/examples/ios/facedetectiongpu/BUILD",
- "mediapipe/examples/ios/faceeffect/BUILD",
- "mediapipe/examples/ios/facemeshgpu/BUILD",
- "mediapipe/examples/ios/handdetectiongpu/BUILD",
- "mediapipe/examples/ios/handtrackinggpu/BUILD",
- "mediapipe/examples/ios/helloworld/BUILD",
- "mediapipe/examples/ios/holistictrackinggpu/BUILD",
- "mediapipe/examples/ios/iristrackinggpu/BUILD",
- "mediapipe/examples/ios/objectdetectioncpu/BUILD",
- "mediapipe/examples/ios/objectdetectiongpu/BUILD",
- "mediapipe/examples/ios/objectdetectiontrackinggpu/BUILD",
- "mediapipe/examples/ios/posetrackinggpu/BUILD",
- "mediapipe/examples/ios/selfiesegmentationgpu/BUILD",
- "mediapipe/framework/BUILD",
- "mediapipe/gpu/BUILD",
- "mediapipe/objc/BUILD",
- "mediapipe/objc/testing/app/BUILD"
- ],
- "buildTargets" : [
- "//mediapipe/examples/ios/facedetectioncpu:FaceDetectionCpuApp",
- "//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp",
- "//mediapipe/examples/ios/faceeffect:FaceEffectApp",
- "//mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp",
- "//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
- "//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
- "//mediapipe/examples/ios/helloworld:HelloWorldApp",
- "//mediapipe/examples/ios/holistictrackinggpu:HolisticTrackingGpuApp",
- "//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp",
- "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
- "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
- "//mediapipe/examples/ios/objectdetectiontrackinggpu:ObjectDetectionTrackingGpuApp",
- "//mediapipe/examples/ios/posetrackinggpu:PoseTrackingGpuApp",
- "//mediapipe/examples/ios/selfiesegmentationgpu:SelfieSegmentationGpuApp",
- "//mediapipe/objc:mediapipe_framework_ios"
- ],
- "optionSet" : {
- "BazelBuildOptionsDebug" : {
- "p" : "$(inherited)"
- },
- "BazelBuildOptionsRelease" : {
- "p" : "$(inherited)"
- },
- "BazelBuildStartupOptionsDebug" : {
- "p" : "$(inherited)"
- },
- "BazelBuildStartupOptionsRelease" : {
- "p" : "$(inherited)"
- },
- "BuildActionPostActionScript" : {
- "p" : "$(inherited)"
- },
- "BuildActionPreActionScript" : {
- "p" : "$(inherited)"
- },
- "CommandlineArguments" : {
- "p" : "$(inherited)"
- },
- "EnvironmentVariables" : {
- "p" : "$(inherited)"
- },
- "LaunchActionPostActionScript" : {
- "p" : "$(inherited)"
- },
- "LaunchActionPreActionScript" : {
- "p" : "$(inherited)"
- },
- "ProjectGenerationBazelStartupOptions" : {
- "p" : "$(inherited)"
- },
- "TestActionPostActionScript" : {
- "p" : "$(inherited)"
- },
- "TestActionPreActionScript" : {
- "p" : "$(inherited)"
- }
- },
- "projectName" : "Mediapipe",
- "sourceFilters" : [
- "mediapipe",
- "mediapipe/calculators",
- "mediapipe/calculators/core",
- "mediapipe/calculators/image",
- "mediapipe/calculators/internal",
- "mediapipe/calculators/tflite",
- "mediapipe/calculators/util",
- "mediapipe/examples",
- "mediapipe/examples/ios",
- "mediapipe/examples/ios/common",
- "mediapipe/examples/ios/common/Base.lproj",
- "mediapipe/examples/ios/facedetectioncpu",
- "mediapipe/examples/ios/facedetectiongpu",
- "mediapipe/examples/ios/faceeffect",
- "mediapipe/examples/ios/faceeffect/Base.lproj",
- "mediapipe/examples/ios/handdetectiongpu",
- "mediapipe/examples/ios/handtrackinggpu",
- "mediapipe/examples/ios/helloworld",
- "mediapipe/examples/ios/holistictrackinggpu",
- "mediapipe/examples/ios/iristrackinggpu",
- "mediapipe/examples/ios/objectdetectioncpu",
- "mediapipe/examples/ios/objectdetectiongpu",
- "mediapipe/examples/ios/posetrackinggpu",
- "mediapipe/examples/ios/selfiesegmentationgpu",
- "mediapipe/framework",
- "mediapipe/framework/deps",
- "mediapipe/framework/formats",
- "mediapipe/framework/formats/annotation",
- "mediapipe/framework/formats/object_detection",
- "mediapipe/framework/port",
- "mediapipe/framework/profiler",
- "mediapipe/framework/stream_handler",
- "mediapipe/framework/tool",
- "mediapipe/gpu",
- "mediapipe/graphs",
- "mediapipe/graphs/edge_detection",
- "mediapipe/graphs/face_detection",
- "mediapipe/graphs/face_geometry",
- "mediapipe/graphs/hand_tracking",
- "mediapipe/graphs/object_detection",
- "mediapipe/graphs/pose_tracking",
- "mediapipe/graphs/selfie_segmentation",
- "mediapipe/models",
- "mediapipe/modules",
- "mediapipe/objc",
- "mediapipe/util",
- "mediapipe/util/android",
- "mediapipe/util/android/file",
- "mediapipe/util/android/file/base",
- "mediapipe/util/tflite",
- "mediapipe/util/tflite/operations"
- ]
-}
diff --git a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
deleted file mode 100644
index a2fe886cf..000000000
--- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
+++ /dev/null
@@ -1,30 +0,0 @@
-{
- "configDefaults" : {
- "optionSet" : {
- "CLANG_CXX_LANGUAGE_STANDARD" : {
- "p" : "c++14"
- }
- }
- },
- "packages" : [
- "",
- "mediapipe",
- "mediapipe/examples/ios",
- "mediapipe/examples/ios/facedetectioncpu",
- "mediapipe/examples/ios/facedetectiongpu",
- "mediapipe/examples/ios/faceeffect",
- "mediapipe/examples/ios/facemeshgpu",
- "mediapipe/examples/ios/handdetectiongpu",
- "mediapipe/examples/ios/handtrackinggpu",
- "mediapipe/examples/ios/holistictrackinggpu",
- "mediapipe/examples/ios/iristrackinggpu",
- "mediapipe/examples/ios/objectdetectioncpu",
- "mediapipe/examples/ios/objectdetectiongpu",
- "mediapipe/examples/ios/objectdetectiontrackinggpu",
- "mediapipe/examples/ios/posetrackinggpu",
- "mediapipe/examples/ios/selfiesegmentationgpu",
- "mediapipe/objc"
- ],
- "projectName" : "Mediapipe",
- "workspaceRoot" : "../.."
-}
diff --git a/mediapipe/__init__.py b/mediapipe/__init__.py
deleted file mode 100644
index 69d7dfc6f..000000000
--- a/mediapipe/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Copyright 2019 - 2020 The MediaPipe Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
diff --git a/mediapipe/calculators/audio/BUILD b/mediapipe/calculators/audio/BUILD
deleted file mode 100644
index ed6a509dc..000000000
--- a/mediapipe/calculators/audio/BUILD
+++ /dev/null
@@ -1,357 +0,0 @@
-# Copyright 2019, 2021 The MediaPipe Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-licenses(["notice"])
-
-package(default_visibility = ["//visibility:private"])
-
-load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
-
-proto_library(
- name = "mfcc_mel_calculators_proto",
- srcs = ["mfcc_mel_calculators.proto"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_proto",
- ],
-)
-
-mediapipe_cc_proto_library(
- name = "mfcc_mel_calculators_cc_proto",
- srcs = ["mfcc_mel_calculators.proto"],
- cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
- visibility = ["//visibility:public"],
- deps = [":mfcc_mel_calculators_proto"],
-)
-
-proto_library(
- name = "rational_factor_resample_calculator_proto",
- srcs = ["rational_factor_resample_calculator.proto"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_proto",
- ],
-)
-
-mediapipe_cc_proto_library(
- name = "rational_factor_resample_calculator_cc_proto",
- srcs = ["rational_factor_resample_calculator.proto"],
- cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
- visibility = ["//visibility:public"],
- deps = [":rational_factor_resample_calculator_proto"],
-)
-
-proto_library(
- name = "spectrogram_calculator_proto",
- srcs = ["spectrogram_calculator.proto"],
- visibility = ["//visibility:public"],
- deps = ["//mediapipe/framework:calculator_proto"],
-)
-
-mediapipe_cc_proto_library(
- name = "spectrogram_calculator_cc_proto",
- srcs = ["spectrogram_calculator.proto"],
- cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
- visibility = ["//visibility:public"],
- deps = [":spectrogram_calculator_proto"],
-)
-
-proto_library(
- name = "stabilized_log_calculator_proto",
- srcs = ["stabilized_log_calculator.proto"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_proto",
- ],
-)
-
-mediapipe_cc_proto_library(
- name = "stabilized_log_calculator_cc_proto",
- srcs = ["stabilized_log_calculator.proto"],
- cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
- visibility = ["//visibility:public"],
- deps = [":stabilized_log_calculator_proto"],
-)
-
-proto_library(
- name = "time_series_framer_calculator_proto",
- srcs = ["time_series_framer_calculator.proto"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_proto",
- ],
-)
-
-mediapipe_cc_proto_library(
- name = "time_series_framer_calculator_cc_proto",
- srcs = ["time_series_framer_calculator.proto"],
- cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
- visibility = ["//visibility:public"],
- deps = [":time_series_framer_calculator_proto"],
-)
-
-cc_library(
- name = "audio_decoder_calculator",
- srcs = ["audio_decoder_calculator.cc"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/port:logging",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:audio_decoder",
- "//mediapipe/util:audio_decoder_cc_proto",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "basic_time_series_calculators",
- srcs = ["basic_time_series_calculators.cc"],
- hdrs = ["basic_time_series_calculators.h"],
- visibility = ["//visibility:public"],
- deps = [
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:ret_check",
- "//mediapipe/util:time_series_util",
- "@com_google_absl//absl/strings",
- "@eigen_archive//:eigen3",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "mfcc_mel_calculators",
- srcs = ["mfcc_mel_calculators.cc"],
- visibility = ["//visibility:public"],
- deps = [
- ":mfcc_mel_calculators_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:logging",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_util",
- "@com_google_absl//absl/strings",
- "@com_google_audio_tools//audio/dsp/mfcc",
- "@eigen_archive//:eigen3",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "rational_factor_resample_calculator",
- srcs = ["rational_factor_resample_calculator.cc"],
- hdrs = ["rational_factor_resample_calculator.h"],
- visibility = ["//visibility:public"],
- deps = [
- ":rational_factor_resample_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:logging",
- "//mediapipe/util:time_series_util",
- "@com_google_absl//absl/strings",
- "@com_google_audio_tools//audio/dsp:resampler",
- "@com_google_audio_tools//audio/dsp:resampler_q",
- "@eigen_archive//:eigen3",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "stabilized_log_calculator",
- srcs = ["stabilized_log_calculator.cc"],
- visibility = ["//visibility:public"],
- deps = [
- ":stabilized_log_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:core_proto",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_util",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "spectrogram_calculator",
- srcs = ["spectrogram_calculator.cc"],
- visibility = ["//visibility:public"],
- deps = [
- ":spectrogram_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:core_proto",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:logging",
- "//mediapipe/framework/port:ret_check",
- "//mediapipe/framework/port:source_location",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_util",
- "@com_google_absl//absl/strings",
- "@com_google_audio_tools//audio/dsp:window_functions",
- "@com_google_audio_tools//audio/dsp/spectrogram",
- "@eigen_archive//:eigen3",
- ],
- alwayslink = 1,
-)
-
-cc_library(
- name = "time_series_framer_calculator",
- srcs = ["time_series_framer_calculator.cc"],
- visibility = ["//visibility:public"],
- deps = [
- ":time_series_framer_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:logging",
- "//mediapipe/framework/port:ret_check",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_util",
- "@com_google_audio_tools//audio/dsp:window_functions",
- "@eigen_archive//:eigen3",
- ],
- alwayslink = 1,
-)
-
-cc_test(
- name = "audio_decoder_calculator_test",
- srcs = ["audio_decoder_calculator_test.cc"],
- data = ["//mediapipe/calculators/audio/testdata:test_audios"],
- deps = [
- ":audio_decoder_calculator",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/deps:file_path",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:parse_text_proto",
- "@com_google_absl//absl/flags:flag",
- ],
-)
-
-cc_test(
- name = "basic_time_series_calculators_test",
- srcs = ["basic_time_series_calculators_test.cc"],
- deps = [
- ":basic_time_series_calculators",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:parse_text_proto",
- "//mediapipe/util:time_series_test_util",
- "@eigen_archive//:eigen3",
- ],
-)
-
-cc_test(
- name = "mfcc_mel_calculators_test",
- srcs = ["mfcc_mel_calculators_test.cc"],
- deps = [
- ":mfcc_mel_calculators",
- ":mfcc_mel_calculators_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_test_util",
- "@eigen_archive//:eigen3",
- ],
-)
-
-cc_test(
- name = "spectrogram_calculator_test",
- srcs = ["spectrogram_calculator_test.cc"],
- deps = [
- ":spectrogram_calculator",
- ":spectrogram_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:benchmark",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_test_util",
- "@com_google_audio_tools//audio/dsp:number_util",
- "@eigen_archive//:eigen3",
- ],
-)
-
-cc_test(
- name = "stabilized_log_calculator_test",
- srcs = ["stabilized_log_calculator_test.cc"],
- deps = [
- ":stabilized_log_calculator",
- ":stabilized_log_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_test_util",
- "@eigen_archive//:eigen3",
- ],
-)
-
-cc_test(
- name = "time_series_framer_calculator_test",
- srcs = ["time_series_framer_calculator_test.cc"],
- deps = [
- ":time_series_framer_calculator",
- ":time_series_framer_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:integral_types",
- "//mediapipe/framework/port:status",
- "//mediapipe/util:time_series_test_util",
- "@com_google_audio_tools//audio/dsp:window_functions",
- "@eigen_archive//:eigen3",
- ],
-)
-
-cc_test(
- name = "rational_factor_resample_calculator_test",
- srcs = ["rational_factor_resample_calculator_test.cc"],
- deps = [
- ":rational_factor_resample_calculator",
- ":rational_factor_resample_calculator_cc_proto",
- "//mediapipe/framework:calculator_framework",
- "//mediapipe/framework:calculator_runner",
- "//mediapipe/framework/formats:matrix",
- "//mediapipe/framework/formats:time_series_header_cc_proto",
- "//mediapipe/framework/port:gtest_main",
- "//mediapipe/framework/port:status",
- "//mediapipe/framework/tool:validate_type",
- "//mediapipe/util:time_series_test_util",
- "@com_google_audio_tools//audio/dsp:signal_vector_util",
- "@eigen_archive//:eigen3",
- ],
-)
diff --git a/mediapipe/calculators/audio/audio_decoder_calculator.cc b/mediapipe/calculators/audio/audio_decoder_calculator.cc
deleted file mode 100644
index 49c201b37..000000000
--- a/mediapipe/calculators/audio/audio_decoder_calculator.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/port/logging.h"
-#include "mediapipe/framework/port/status.h"
-#include "mediapipe/util/audio_decoder.h"
-#include "mediapipe/util/audio_decoder.pb.h"
-
-namespace mediapipe {
-
-// The AudioDecoderCalculator decodes an audio stream of the media file. It
-// produces two output streams contain audio packets and the header infomation.
-//
-// Output Streams:
-// AUDIO: Output audio frames (Matrix).
-// AUDIO_HEADER:
-// Optional audio header information output
-// Input Side Packets:
-// INPUT_FILE_PATH: The input file path.
-//
-// Example config:
-// node {
-// calculator: "AudioDecoderCalculator"
-// input_side_packet: "INPUT_FILE_PATH:input_file_path"
-// output_stream: "AUDIO:audio"
-// output_stream: "AUDIO_HEADER:audio_header"
-// node_options {
-// [type.googleapis.com/mediapipe.AudioDecoderOptions]: {
-// audio_stream { stream_index: 0 }
-// start_time: 0
-// end_time: 1
-// }
-// }
-//
-// TODO: support decoding multiple streams.
-class AudioDecoderCalculator : public CalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc);
-
- absl::Status Open(CalculatorContext* cc) override;
- absl::Status Process(CalculatorContext* cc) override;
- absl::Status Close(CalculatorContext* cc) override;
-
- private:
- std::unique_ptr decoder_;
-};
-
-absl::Status AudioDecoderCalculator::GetContract(CalculatorContract* cc) {
- cc->InputSidePackets().Tag("INPUT_FILE_PATH").Set();
- if (cc->InputSidePackets().HasTag("OPTIONS")) {
- cc->InputSidePackets().Tag("OPTIONS").Set();
- }
- cc->Outputs().Tag("AUDIO").Set();
- if (cc->Outputs().HasTag("AUDIO_HEADER")) {
- cc->Outputs().Tag("AUDIO_HEADER").SetNone();
- }
- return absl::OkStatus();
-}
-
-absl::Status AudioDecoderCalculator::Open(CalculatorContext* cc) {
- const std::string& input_file_path =
- cc->InputSidePackets().Tag("INPUT_FILE_PATH").Get();
- const auto& decoder_options =
- tool::RetrieveOptions(cc->Options(),
- cc->InputSidePackets(), "OPTIONS");
- decoder_ = absl::make_unique();
- MP_RETURN_IF_ERROR(decoder_->Initialize(input_file_path, decoder_options));
- std::unique_ptr header =
- absl::make_unique();
- if (decoder_->FillAudioHeader(decoder_options.audio_stream(0), header.get())
- .ok()) {
- // Only pass on a header if the decoder could actually produce one.
- // otherwise, the header will be empty.
- cc->Outputs().Tag("AUDIO_HEADER").SetHeader(Adopt(header.release()));
- }
- cc->Outputs().Tag("AUDIO_HEADER").Close();
- return absl::OkStatus();
-}
-
-absl::Status AudioDecoderCalculator::Process(CalculatorContext* cc) {
- Packet data;
- int options_index = -1;
- auto status = decoder_->GetData(&options_index, &data);
- if (status.ok()) {
- cc->Outputs().Tag("AUDIO").AddPacket(data);
- }
- return status;
-}
-
-absl::Status AudioDecoderCalculator::Close(CalculatorContext* cc) {
- return decoder_->Close();
-}
-
-REGISTER_CALCULATOR(AudioDecoderCalculator);
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/audio_decoder_calculator_test.cc b/mediapipe/calculators/audio/audio_decoder_calculator_test.cc
deleted file mode 100644
index 8e3babeb0..000000000
--- a/mediapipe/calculators/audio/audio_decoder_calculator_test.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "absl/flags/flag.h"
-#include "mediapipe/framework/calculator_runner.h"
-#include "mediapipe/framework/deps/file_path.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/gmock.h"
-#include "mediapipe/framework/port/gtest.h"
-#include "mediapipe/framework/port/parse_text_proto.h"
-#include "mediapipe/framework/port/status_matchers.h"
-
-namespace mediapipe {
-
-TEST(AudioDecoderCalculatorTest, TestWAV) {
- CalculatorGraphConfig::Node node_config =
- ParseTextProtoOrDie(R"pb(
- calculator: "AudioDecoderCalculator"
- input_side_packet: "INPUT_FILE_PATH:input_file_path"
- output_stream: "AUDIO:audio"
- output_stream: "AUDIO_HEADER:audio_header"
- node_options {
- [type.googleapis.com/mediapipe.AudioDecoderOptions]: {
- audio_stream { stream_index: 0 }
- }
- })pb");
- CalculatorRunner runner(node_config);
- runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket(
- file::JoinPath("./",
- "/mediapipe/calculators/audio/"
- "testdata/sine_wave_1k_44100_mono_2_sec_wav.audio"));
- MP_ASSERT_OK(runner.Run());
- MP_EXPECT_OK(runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.ValidateAsType());
- const mediapipe::TimeSeriesHeader& header =
- runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.Get();
- EXPECT_EQ(44100, header.sample_rate());
- EXPECT_EQ(1, header.num_channels());
- EXPECT_TRUE(runner.Outputs().Tag("AUDIO").packets.size() >=
- std::ceil(44100.0 * 2 / 2048));
-}
-
-TEST(AudioDecoderCalculatorTest, Test48KWAV) {
- CalculatorGraphConfig::Node node_config =
- ParseTextProtoOrDie(R"pb(
- calculator: "AudioDecoderCalculator"
- input_side_packet: "INPUT_FILE_PATH:input_file_path"
- output_stream: "AUDIO:audio"
- output_stream: "AUDIO_HEADER:audio_header"
- node_options {
- [type.googleapis.com/mediapipe.AudioDecoderOptions]: {
- audio_stream { stream_index: 0 }
- }
- })pb");
- CalculatorRunner runner(node_config);
- runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket(
- file::JoinPath("./",
- "/mediapipe/calculators/audio/"
- "testdata/sine_wave_1k_48000_stereo_2_sec_wav.audio"));
- MP_ASSERT_OK(runner.Run());
- MP_EXPECT_OK(runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.ValidateAsType());
- const mediapipe::TimeSeriesHeader& header =
- runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.Get();
- EXPECT_EQ(48000, header.sample_rate());
- EXPECT_EQ(2, header.num_channels());
- EXPECT_TRUE(runner.Outputs().Tag("AUDIO").packets.size() >=
- std::ceil(48000.0 * 2 / 1024));
-}
-
-TEST(AudioDecoderCalculatorTest, TestMP3) {
- CalculatorGraphConfig::Node node_config =
- ParseTextProtoOrDie(R"pb(
- calculator: "AudioDecoderCalculator"
- input_side_packet: "INPUT_FILE_PATH:input_file_path"
- output_stream: "AUDIO:audio"
- output_stream: "AUDIO_HEADER:audio_header"
- node_options {
- [type.googleapis.com/mediapipe.AudioDecoderOptions]: {
- audio_stream { stream_index: 0 }
- }
- })pb");
- CalculatorRunner runner(node_config);
- runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket(
- file::JoinPath("./",
- "/mediapipe/calculators/audio/"
- "testdata/sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
- MP_ASSERT_OK(runner.Run());
- MP_EXPECT_OK(runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.ValidateAsType());
- const mediapipe::TimeSeriesHeader& header =
- runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.Get();
- EXPECT_EQ(44100, header.sample_rate());
- EXPECT_EQ(2, header.num_channels());
- EXPECT_TRUE(runner.Outputs().Tag("AUDIO").packets.size() >=
- std::ceil(44100.0 * 2 / 1152));
-}
-
-TEST(AudioDecoderCalculatorTest, TestAAC) {
- CalculatorGraphConfig::Node node_config =
- ParseTextProtoOrDie(R"pb(
- calculator: "AudioDecoderCalculator"
- input_side_packet: "INPUT_FILE_PATH:input_file_path"
- output_stream: "AUDIO:audio"
- output_stream: "AUDIO_HEADER:audio_header"
- node_options {
- [type.googleapis.com/mediapipe.AudioDecoderOptions]: {
- audio_stream { stream_index: 0 }
- }
- })pb");
- CalculatorRunner runner(node_config);
- runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket(
- file::JoinPath("./",
- "/mediapipe/calculators/audio/"
- "testdata/sine_wave_1k_44100_stereo_2_sec_aac.audio"));
- MP_ASSERT_OK(runner.Run());
- MP_EXPECT_OK(runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.ValidateAsType());
- const mediapipe::TimeSeriesHeader& header =
- runner.Outputs()
- .Tag("AUDIO_HEADER")
- .header.Get();
- EXPECT_EQ(44100, header.sample_rate());
- EXPECT_EQ(2, header.num_channels());
- EXPECT_TRUE(runner.Outputs().Tag("AUDIO").packets.size() >=
- std::ceil(44100.0 * 2 / 1024));
-}
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/basic_time_series_calculators.cc b/mediapipe/calculators/audio/basic_time_series_calculators.cc
deleted file mode 100644
index f7b24f6f6..000000000
--- a/mediapipe/calculators/audio/basic_time_series_calculators.cc
+++ /dev/null
@@ -1,405 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Basic Calculators that operate on TimeSeries streams.
-#include "mediapipe/calculators/audio/basic_time_series_calculators.h"
-
-#include
-#include
-
-#include "Eigen/Core"
-#include "absl/strings/str_cat.h"
-#include "mediapipe/framework/port/ret_check.h"
-#include "mediapipe/util/time_series_util.h"
-
-namespace mediapipe {
-namespace {
-static bool SafeMultiply(int x, int y, int* result) {
- static_assert(sizeof(int64) >= 2 * sizeof(int),
- "Unable to detect overflow after multiplication");
- const int64 big = static_cast(x) * static_cast(y);
- if (big > static_cast(INT_MIN) && big < static_cast(INT_MAX)) {
- if (result != nullptr) *result = static_cast(big);
- return true;
- } else {
- return false;
- }
-}
-} // namespace
-
-absl::Status BasicTimeSeriesCalculatorBase::GetContract(
- CalculatorContract* cc) {
- cc->Inputs().Index(0).Set(
- // Input stream with TimeSeriesHeader.
- );
- cc->Outputs().Index(0).Set(
- // Output stream with TimeSeriesHeader.
- );
- return absl::OkStatus();
-}
-
-absl::Status BasicTimeSeriesCalculatorBase::Open(CalculatorContext* cc) {
- TimeSeriesHeader input_header;
- MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
- cc->Inputs().Index(0).Header(), &input_header));
-
- auto output_header = new TimeSeriesHeader(input_header);
- MP_RETURN_IF_ERROR(MutateHeader(output_header));
- cc->Outputs().Index(0).SetHeader(Adopt(output_header));
-
- cc->SetOffset(0);
-
- return absl::OkStatus();
-}
-
-absl::Status BasicTimeSeriesCalculatorBase::Process(CalculatorContext* cc) {
- const Matrix& input = cc->Inputs().Index(0).Get();
- MP_RETURN_IF_ERROR(time_series_util::IsMatrixShapeConsistentWithHeader(
- input, cc->Inputs().Index(0).Header().Get()));
-
- std::unique_ptr output(new Matrix(ProcessMatrix(input)));
- MP_RETURN_IF_ERROR(time_series_util::IsMatrixShapeConsistentWithHeader(
- *output, cc->Outputs().Index(0).Header().Get()));
-
- cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
- return absl::OkStatus();
-}
-
-absl::Status BasicTimeSeriesCalculatorBase::MutateHeader(
- TimeSeriesHeader* output_header) {
- return absl::OkStatus();
-}
-
-// Calculator to sum an input time series across channels. This is
-// useful for e.g. computing 'summary SAI' pitchogram features.
-//
-// Options proto: None.
-class SumTimeSeriesAcrossChannelsCalculator
- : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_channels(1);
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.colwise().sum();
- }
-};
-REGISTER_CALCULATOR(SumTimeSeriesAcrossChannelsCalculator);
-
-// Calculator to average an input time series across channels. This is
-// useful for e.g. converting stereo or multi-channel files to mono.
-//
-// Options proto: None.
-class AverageTimeSeriesAcrossChannelsCalculator
- : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_channels(1);
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.colwise().mean();
- }
-};
-REGISTER_CALCULATOR(AverageTimeSeriesAcrossChannelsCalculator);
-
-// Calculator to convert a (temporal) summary SAI stream (a single-channel
-// stream output by SumTimeSeriesAcrossChannelsCalculator) into pitchogram
-// frames by transposing the input packets, swapping the time and channel axes.
-//
-// Options proto: None.
-class SummarySaiToPitchogramCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- if (output_header->num_channels() != 1) {
- return tool::StatusInvalid(
- absl::StrCat("Expected single-channel input, got ",
- output_header->num_channels()));
- }
- output_header->set_num_channels(output_header->num_samples());
- output_header->set_num_samples(1);
- output_header->set_sample_rate(output_header->packet_rate());
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.transpose();
- }
-};
-REGISTER_CALCULATOR(SummarySaiToPitchogramCalculator);
-
-// Calculator to reverse the order of channels in TimeSeries packets.
-// This is useful for e.g. interfacing with the speech pipeline which uses the
-// opposite convention to the hearing filterbanks.
-//
-// Options proto: None.
-class ReverseChannelOrderCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.colwise().reverse();
- }
-};
-REGISTER_CALCULATOR(ReverseChannelOrderCalculator);
-
-// Calculator to flatten all samples in a TimeSeries packet down into
-// a single 'sample' vector. This is useful for e.g. stacking several
-// frames of features into a single feature vector.
-//
-// Options proto: None.
-class FlattenPacketCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- const int num_input_channels = output_header->num_channels();
- const int num_input_samples = output_header->num_samples();
- RET_CHECK(num_input_channels >= 0)
- << "FlattenPacketCalculator: num_input_channels < 0";
- RET_CHECK(num_input_samples >= 0)
- << "FlattenPacketCalculator: num_input_samples < 0";
- int output_num_channels;
- RET_CHECK(SafeMultiply(num_input_channels, num_input_samples,
- &output_num_channels))
- << "FlattenPacketCalculator: Multiplication failed.";
- output_header->set_num_channels(output_num_channels);
- output_header->set_num_samples(1);
- output_header->set_sample_rate(output_header->packet_rate());
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- // Flatten by interleaving channels so that full samples are
- // stacked on top of each other instead of interleaving samples
- // from the same channel.
- Matrix output(input_matrix.size(), 1);
- for (int sample = 0; sample < input_matrix.cols(); ++sample) {
- output.middleRows(sample * input_matrix.rows(), input_matrix.rows()) =
- input_matrix.col(sample);
- }
- return output;
- }
-};
-REGISTER_CALCULATOR(FlattenPacketCalculator);
-
-// Calculator to subtract the within-packet mean for each channel from each
-// corresponding channel.
-//
-// Options proto: None.
-class SubtractMeanCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- Matrix mean = input_matrix.rowwise().mean();
- return input_matrix - mean.replicate(1, input_matrix.cols());
- }
-};
-REGISTER_CALCULATOR(SubtractMeanCalculator);
-
-// Calculator to subtract the mean over all values (across all times and
-// channels) in a Packet from the values in that Packet.
-//
-// Options proto: None.
-class SubtractMeanAcrossChannelsCalculator
- : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- auto mean = input_matrix.mean();
- return (input_matrix.array() - mean).matrix();
- }
-};
-REGISTER_CALCULATOR(SubtractMeanAcrossChannelsCalculator);
-
-// Calculator to divide all values in a Packet by the average value across all
-// times and channels in the packet. This is useful for normalizing
-// nonnegative quantities like power, but might cause unexpected results if used
-// with Packets that can contain negative numbers.
-//
-// If mean is exactly zero, the output will be a matrix of all ones, because
-// that's what happens in other cases where all values are equal.
-//
-// Options proto: None.
-class DivideByMeanAcrossChannelsCalculator
- : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- auto mean = input_matrix.mean();
-
- if (mean != 0) {
- return input_matrix / mean;
-
- // When used with nonnegative matrices, the mean will only be zero if the
- // entire matrix is exactly zero. If mean is exactly zero, the output will
- // be a matrix of all ones, because that's what happens in other cases
- // where
- // all values are equal.
- } else {
- return Matrix::Ones(input_matrix.rows(), input_matrix.cols());
- }
- }
-};
-REGISTER_CALCULATOR(DivideByMeanAcrossChannelsCalculator);
-
-// Calculator to calculate the mean for each channel.
-//
-// Options proto: None.
-class MeanCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_samples(1);
- output_header->set_sample_rate(output_header->packet_rate());
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.rowwise().mean();
- }
-};
-REGISTER_CALCULATOR(MeanCalculator);
-
-// Calculator to calculate the uncorrected sample standard deviation in each
-// channel, independently for each Packet. I.e. divide by the number of samples
-// in the Packet, not ( - 1).
-//
-// Options proto: None.
-class StandardDeviationCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_samples(1);
- output_header->set_sample_rate(output_header->packet_rate());
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- Eigen::VectorXf mean = input_matrix.rowwise().mean();
- return (input_matrix.colwise() - mean).rowwise().norm() /
- sqrt(input_matrix.cols());
- }
-};
-REGISTER_CALCULATOR(StandardDeviationCalculator);
-
-// Calculator to calculate the covariance matrix. If the input matrix
-// has N channels, the output matrix will be an N by N symmetric
-// matrix.
-//
-// Options proto: None.
-class CovarianceCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_samples(output_header->num_channels());
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- auto mean = input_matrix.rowwise().mean();
- auto zero_mean_input =
- input_matrix - mean.replicate(1, input_matrix.cols());
- return (zero_mean_input * zero_mean_input.transpose()) /
- input_matrix.cols();
- }
-};
-REGISTER_CALCULATOR(CovarianceCalculator);
-
-// Calculator to get the per column L2 norm of an input time series.
-//
-// Options proto: None.
-class L2NormCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- output_header->set_num_channels(1);
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.colwise().norm();
- }
-};
-REGISTER_CALCULATOR(L2NormCalculator);
-
-// Calculator to convert each column of a matrix to a unit vector.
-//
-// Options proto: None.
-class L2NormalizeColumnCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.colwise().normalized();
- }
-};
-REGISTER_CALCULATOR(L2NormalizeColumnCalculator);
-
-// Calculator to apply L2 normalization to the input matrix.
-//
-// Returns the matrix as is if the RMS is <= 1E-8.
-// Options proto: None.
-class L2NormalizeCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- constexpr double kEpsilon = 1e-8;
- double rms = std::sqrt(input_matrix.array().square().mean());
- if (rms <= kEpsilon) {
- return input_matrix;
- }
- return input_matrix / rms;
- }
-};
-REGISTER_CALCULATOR(L2NormalizeCalculator);
-
-// Calculator to apply Peak normalization to the input matrix.
-//
-// Returns the matrix as is if the peak is <= 1E-8.
-// Options proto: None.
-class PeakNormalizeCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- constexpr double kEpsilon = 1e-8;
- double max_pcm = input_matrix.cwiseAbs().maxCoeff();
- if (max_pcm <= kEpsilon) {
- return input_matrix;
- }
- return input_matrix / max_pcm;
- }
-};
-REGISTER_CALCULATOR(PeakNormalizeCalculator);
-
-// Calculator to compute the elementwise square of an input time series.
-//
-// Options proto: None.
-class ElementwiseSquareCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.array().square();
- }
-};
-REGISTER_CALCULATOR(ElementwiseSquareCalculator);
-
-// Calculator that outputs first floor(num_samples / 2) of the samples.
-//
-// Options proto: None.
-class FirstHalfSlicerCalculator : public BasicTimeSeriesCalculatorBase {
- protected:
- absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
- const int num_input_samples = output_header->num_samples();
- RET_CHECK(num_input_samples >= 0)
- << "FirstHalfSlicerCalculator: num_input_samples < 0";
- output_header->set_num_samples(num_input_samples / 2);
- return absl::OkStatus();
- }
-
- Matrix ProcessMatrix(const Matrix& input_matrix) final {
- return input_matrix.block(0, 0, input_matrix.rows(),
- input_matrix.cols() / 2);
- }
-};
-REGISTER_CALCULATOR(FirstHalfSlicerCalculator);
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/basic_time_series_calculators.h b/mediapipe/calculators/audio/basic_time_series_calculators.h
deleted file mode 100644
index ef31f3448..000000000
--- a/mediapipe/calculators/audio/basic_time_series_calculators.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Abstract base class for basic MediaPipe calculators that operate on
-// TimeSeries streams and don't require any Options protos.
-// Subclasses must override ProcessMatrix, and optionally
-// MutateHeader.
-
-#ifndef MEDIAPIPE_CALCULATORS_AUDIO_BASIC_TIME_SERIES_CALCULATORS_H_
-#define MEDIAPIPE_CALCULATORS_AUDIO_BASIC_TIME_SERIES_CALCULATORS_H_
-
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-
-namespace mediapipe {
-
-class BasicTimeSeriesCalculatorBase : public CalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc);
- absl::Status Open(CalculatorContext* cc) final;
- absl::Status Process(CalculatorContext* cc) final;
-
- protected:
- // Open() calls this method to mutate the output stream header. The input
- // to this function will contain a copy of the input stream header, so
- // subclasses that do not need to mutate the header do not need to override
- // it.
- virtual absl::Status MutateHeader(TimeSeriesHeader* output_header);
-
- // Process() calls this method on each packet to compute the output matrix.
- virtual Matrix ProcessMatrix(const Matrix& input_matrix) = 0;
-};
-
-} // namespace mediapipe
-
-#endif // MEDIAPIPE_CALCULATORS_AUDIO_BASIC_TIME_SERIES_CALCULATORS_H_
diff --git a/mediapipe/calculators/audio/basic_time_series_calculators_test.cc b/mediapipe/calculators/audio/basic_time_series_calculators_test.cc
deleted file mode 100644
index 7211b83fe..000000000
--- a/mediapipe/calculators/audio/basic_time_series_calculators_test.cc
+++ /dev/null
@@ -1,515 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include
-#include
-#include
-
-#include "Eigen/Core"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/calculator_runner.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/gmock.h"
-#include "mediapipe/framework/port/gtest.h"
-#include "mediapipe/framework/port/integral_types.h"
-#include "mediapipe/framework/port/parse_text_proto.h"
-#include "mediapipe/util/time_series_test_util.h"
-
-namespace mediapipe {
-
-class SumTimeSeriesAcrossChannelsCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override {
- calculator_name_ = "SumTimeSeriesAcrossChannelsCalculator";
- }
-};
-
-TEST_F(SumTimeSeriesAcrossChannelsCalculatorTest, IsNoOpOnSingleChannelInputs) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 1 num_samples: 5");
- const Matrix input =
- Matrix::Random(header.num_channels(), header.num_samples());
-
- Test(header, {input}, header, {input});
-}
-
-TEST_F(SumTimeSeriesAcrossChannelsCalculatorTest, ConstantPacket) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 3 num_samples: 5");
- TimeSeriesHeader output_header(header);
- output_header.set_num_channels(1);
-
- Test(header,
- {Matrix::Constant(header.num_channels(), header.num_samples(), 1)},
- output_header,
- {Matrix::Constant(1, header.num_samples(), header.num_channels())});
-}
-
-TEST_F(SumTimeSeriesAcrossChannelsCalculatorTest, MultiplePackets) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 3 num_samples: 5");
- Matrix in(header.num_channels(), header.num_samples());
- in << 10, -1, -1, 0, 0, 20, -2, 0, 1, 0, 30, -3, 1, 0, 12;
-
- TimeSeriesHeader output_header(header);
- output_header.set_num_channels(1);
- Matrix out(1, header.num_samples());
- out << 60, -6, 0, 1, 12;
-
- Test(header, {in, 2 * in, in + Matrix::Constant(in.rows(), in.cols(), 3.5f)},
- output_header,
- {out, 2 * out,
- out + Matrix::Constant(out.rows(), out.cols(),
- 3.5 * header.num_channels())});
-}
-
-class AverageTimeSeriesAcrossChannelsCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override {
- calculator_name_ = "AverageTimeSeriesAcrossChannelsCalculator";
- }
-};
-
-TEST_F(AverageTimeSeriesAcrossChannelsCalculatorTest,
- IsNoOpOnSingleChannelInputs) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 1 num_samples: 5");
- const Matrix input =
- Matrix::Random(header.num_channels(), header.num_samples());
-
- Test(header, {input}, header, {input});
-}
-
-TEST_F(AverageTimeSeriesAcrossChannelsCalculatorTest, ConstantPacket) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 3 num_samples: 5");
- TimeSeriesHeader output_header(header);
- output_header.set_num_channels(1);
-
- Matrix input =
- Matrix::Constant(header.num_channels(), header.num_samples(), 0.0);
- input.row(0) = Matrix::Constant(1, header.num_samples(), 1.0);
-
- Test(
- header, {input}, output_header,
- {Matrix::Constant(1, header.num_samples(), 1.0 / header.num_channels())});
-}
-
-class SummarySaiToPitchogramCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override {
- calculator_name_ = "SummarySaiToPitchogramCalculator";
- }
-};
-
-TEST_F(SummarySaiToPitchogramCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 1 num_samples: 3");
- Matrix input(1, input_header.num_samples());
- input << 3, -9, 4;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 5.0 packet_rate: 5.0 num_channels: 3 num_samples: 1");
- Matrix output(input_header.num_samples(), 1);
- output << 3, -9, 4;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class ReverseChannelOrderCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "ReverseChannelOrderCalculator"; }
-};
-
-TEST_F(ReverseChannelOrderCalculatorTest, IsNoOpOnSingleChannelInputs) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 1 num_samples: 5");
- const Matrix input =
- Matrix::Random(header.num_channels(), header.num_samples());
-
- Test(header, {input}, header, {input});
-}
-
-TEST_F(ReverseChannelOrderCalculatorTest, SinglePacket) {
- const TimeSeriesHeader header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 num_channels: 5 num_samples: 2");
- Matrix input(header.num_channels(), header.num_samples());
- input.transpose() << 1, 2, 3, 4, 5, -1, -2, -3, -4, -5;
- Matrix output(header.num_channels(), header.num_samples());
- output.transpose() << 5, 4, 3, 2, 1, -5, -4, -3, -2, -1;
-
- Test(header, {input}, header, {output});
-}
-
-class FlattenPacketCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "FlattenPacketCalculator"; }
-};
-
-TEST_F(FlattenPacketCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input.transpose() << 1, 2, 3, 4, 5, -1, -2, -3, -4, -5;
- Matrix output(10, 1);
- output << 1, 2, 3, 4, 5, -1, -2, -3, -4, -5;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 10.0 packet_rate: 10.0 num_channels: 10 num_samples: 1");
- Test(input_header, {input}, output_header, {output});
-}
-
-class SubtractMeanCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "SubtractMeanCalculator"; }
-};
-
-TEST_F(SubtractMeanCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- Matrix output(input_header.num_channels(), input_header.num_samples());
-
- // clang-format off
- input.transpose() << 1, 0, 3, 0, 1,
- -1, -2, -3, 4, 7;
- output.transpose() << 1, 1, 3, -2, -3,
- -1, -1, -3, 2, 3;
- // clang-format on
-
- const TimeSeriesHeader output_header = input_header;
- Test(input_header, {input}, output_header, {output});
-}
-
-class SubtractMeanAcrossChannelsCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override {
- calculator_name_ = "SubtractMeanAcrossChannelsCalculator";
- }
-};
-
-TEST_F(SubtractMeanAcrossChannelsCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- TimeSeriesHeader output_header(input_header);
- output_header.set_num_samples(2);
-
- Matrix input(input_header.num_channels(), input_header.num_samples());
- Matrix output(output_header.num_channels(), output_header.num_samples());
-
- // clang-format off
- input.transpose() << 1.0, 2.0, 3.0,
- 4.0, 5.0, 6.0;
- output.transpose() << 1.0 - 3.5, 2.0 - 3.5, 3.0 - 3.5,
- 4.0 - 3.5, 5.0 - 3.5, 6.0 - 3.5;
- // clang-format on
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class DivideByMeanAcrossChannelsCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override {
- calculator_name_ = "DivideByMeanAcrossChannelsCalculator";
- }
-};
-
-TEST_F(DivideByMeanAcrossChannelsCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input.transpose() << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0;
-
- TimeSeriesHeader output_header(input_header);
- output_header.set_num_samples(2);
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output.transpose() << 1.0 / 3.5, 2.0 / 3.5, 3.0 / 3.5, 4.0 / 3.5, 5.0 / 3.5,
- 6.0 / 3.5;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-TEST_F(DivideByMeanAcrossChannelsCalculatorTest, ReturnsOneForZeroMean) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input.transpose() << -3.0, -2.0, -1.0, 1.0, 2.0, 3.0;
-
- TimeSeriesHeader output_header(input_header);
- output_header.set_num_samples(2);
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output.transpose() << 1.0, 1.0, 1.0, 1.0, 1.0, 1.0;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class MeanCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "MeanCalculator"; }
-};
-
-TEST_F(MeanCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input.transpose() << 1.0, 2.0, 3.0, 4.0, 5.0, 6.0;
-
- TimeSeriesHeader output_header(input_header);
- output_header.set_num_samples(1);
- output_header.set_sample_rate(10.0);
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << (1.0 + 4.0) / 2, (2.0 + 5.0) / 2, (3.0 + 6.0) / 2;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class StandardDeviationCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "StandardDeviationCalculator"; }
-};
-
-TEST_F(StandardDeviationCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input.transpose() << 0.0, 2.0, 3.0, 4.0, 5.0, 8.0;
-
- TimeSeriesHeader output_header(input_header);
- output_header.set_sample_rate(10.0);
- output_header.set_num_samples(1);
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << sqrt((pow(0.0 - 2.0, 2) + pow(4.0 - 2.0, 2)) / 2),
- sqrt((pow(2.0 - 3.5, 2) + pow(5.0 - 3.5, 2)) / 2),
- sqrt((pow(3.0 - 5.5, 2) + pow(8.0 - 5.5, 2)) / 2);
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class CovarianceCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "CovarianceCalculator"; }
-};
-
-TEST_F(CovarianceCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 3 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
-
- // We'll specify in transposed form so we can write one channel at a time.
- input << 1.0, 3.0, 5.0, 9.0, -1.0, -3.0;
-
- TimeSeriesHeader output_header(input_header);
- output_header.set_num_samples(output_header.num_channels());
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << 1, 2, -1, 2, 4, -2, -1, -2, 1;
- Test(input_header, {input}, output_header, {output});
-}
-
-class L2NormCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "L2NormCalculator"; }
-};
-
-TEST_F(L2NormCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 3, 5, 8, 4, 12, -15;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 1 num_samples: 3");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << 5, 13, 17;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class L2NormalizeColumnCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "L2NormalizeColumnCalculator"; }
-};
-
-TEST_F(L2NormalizeColumnCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 0.3, 0.4, 0.8, 0.5, 0.9, 0.8;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix output(output_header.num_channels(), output_header.num_samples());
-
- // The values in output are column-wise L2 normalized
- // e.g.
- // |a| -> |a/sqrt(a^2 + b^2)|
- // |b| |b/sqrt(a^2 + b^2)|
- output << 0.51449579000473022, 0.40613847970962524, 0.70710676908493042,
- 0.85749292373657227, 0.91381156444549561, 0.70710676908493042;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class L2NormalizeCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "L2NormalizeCalculator"; }
-};
-
-TEST_F(L2NormalizeCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 0.3, 0.4, 0.8, 0.5, 0.9, 0.8;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix output(output_header.num_channels(), output_header.num_samples());
-
- // The values in output are L2 normalized
- // a -> a/sqrt(a^2 + b^2 + c^2 + ...) * sqrt(matrix.cols()*matrix.rows())
- output << 0.45661166, 0.60881555, 1.21763109, 0.76101943, 1.36983498,
- 1.21763109;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-TEST_F(L2NormalizeCalculatorTest, UnitMatrixStaysUnchanged) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 3 num_samples: 5");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0;
-
- Test(input_header, {input}, input_header, {input});
-}
-
-class PeakNormalizeCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "PeakNormalizeCalculator"; }
-};
-
-TEST_F(PeakNormalizeCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 0.3, 0.4, 0.8, 0.5, 0.9, 0.8;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << 0.33333333, 0.44444444, 0.88888889, 0.55555556, 1.0, 0.88888889;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-TEST_F(PeakNormalizeCalculatorTest, UnitMatrixStaysUnchanged) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 3 num_samples: 5");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
- 1.0, -1.0, 1.0;
-
- Test(input_header, {input}, input_header, {input});
-}
-
-class ElementwiseSquareCalculatorTest
- : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "ElementwiseSquareCalculator"; }
-};
-
-TEST_F(ElementwiseSquareCalculatorTest, SinglePacket) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- input << 3, 5, 8, 4, 12, -15;
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 8000.0 packet_rate: 5.0 num_channels: 2 num_samples: 3");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output << 9, 25, 64, 16, 144, 225;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-class FirstHalfSlicerCalculatorTest : public BasicTimeSeriesCalculatorTestBase {
- protected:
- void SetUp() override { calculator_name_ = "FirstHalfSlicerCalculator"; }
-};
-
-TEST_F(FirstHalfSlicerCalculatorTest, SinglePacketEvenNumSamples) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- // clang-format off
- input.transpose() << 0, 1, 2, 3, 4,
- 5, 6, 7, 8, 9;
- // clang-format on
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 1");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output.transpose() << 0, 1, 2, 3, 4;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-TEST_F(FirstHalfSlicerCalculatorTest, SinglePacketOddNumSamples) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 3");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- // clang-format off
- input.transpose() << 0, 1, 2, 3, 4,
- 5, 6, 7, 8, 9,
- 0, 0, 0, 0, 0;
- // clang-format on
-
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 1");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output.transpose() << 0, 1, 2, 3, 4;
-
- Test(input_header, {input}, output_header, {output});
-}
-
-TEST_F(FirstHalfSlicerCalculatorTest, MultiplePackets) {
- const TimeSeriesHeader input_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 2");
- Matrix input(input_header.num_channels(), input_header.num_samples());
- // clang-format off
- input.transpose() << 0, 1, 2, 3, 4,
- 5, 6, 7, 8, 9;
- // clang-format on
- const TimeSeriesHeader output_header = ParseTextProtoOrDie(
- "sample_rate: 20.0 packet_rate: 10.0 num_channels: 5 num_samples: 1");
- Matrix output(output_header.num_channels(), output_header.num_samples());
- output.transpose() << 0, 1, 2, 3, 4;
-
- Test(input_header,
- {input, 2 * input,
- input + Matrix::Constant(input.rows(), input.cols(), 3.5f)},
- output_header,
- {output, 2 * output,
- output + Matrix::Constant(output.rows(), output.cols(), 3.5f)});
-}
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/mfcc_mel_calculators.cc b/mediapipe/calculators/audio/mfcc_mel_calculators.cc
deleted file mode 100644
index a63b9d6ea..000000000
--- a/mediapipe/calculators/audio/mfcc_mel_calculators.cc
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// MediaPipe Calculator wrapper around audio/dsp/mfcc/
-// classes MelFilterbank (magnitude spectrograms warped to the Mel
-// approximation of the auditory frequency scale) and Mfcc (Mel Frequency
-// Cepstral Coefficients, the decorrelated transform of log-Mel-spectrum
-// commonly used as acoustic features in speech and other audio tasks.
-// Both calculators expect as input the SQUARED_MAGNITUDE-domain outputs
-// from the MediaPipe SpectrogramCalculator object.
-#include
-#include
-
-#include "Eigen/Core"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/string_view.h"
-#include "absl/strings/substitute.h"
-#include "audio/dsp/mfcc/mel_filterbank.h"
-#include "audio/dsp/mfcc/mfcc.h"
-#include "mediapipe/calculators/audio/mfcc_mel_calculators.pb.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/logging.h"
-#include "mediapipe/framework/port/status.h"
-#include "mediapipe/util/time_series_util.h"
-
-namespace mediapipe {
-
-namespace {
-
-// Portable version of TimeSeriesHeader's DebugString.
-std::string PortableDebugString(const TimeSeriesHeader& header) {
- std::string unsubstituted_header_debug_str = R"(
- sample_rate: $0
- num_channels: $1
- num_samples: $2
- packet_rate: $3
- audio_sample_rate: $4
- )";
- return absl::Substitute(unsubstituted_header_debug_str, header.sample_rate(),
- header.num_channels(), header.num_samples(),
- header.packet_rate(), header.audio_sample_rate());
-}
-
-} // namespace
-
-// Abstract base class for Calculators that transform feature vectors on a
-// frame-by-frame basis.
-// Subclasses must override pure virtual methods ConfigureTransform and
-// TransformFrame.
-// Input and output MediaPipe packets are matrices with one column per frame,
-// and one row per feature dimension. Each input packet results in an
-// output packet with the same number of columns (but differing numbers of
-// rows corresponding to the new feature space).
-class FramewiseTransformCalculatorBase : public CalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc) {
- cc->Inputs().Index(0).Set(
- // Sequence of Matrices, each column describing a particular time frame,
- // each row a feature dimension, with TimeSeriesHeader.
- );
- cc->Outputs().Index(0).Set(
- // Sequence of Matrices, each column describing a particular time frame,
- // each row a feature dimension, with TimeSeriesHeader.
- );
- return absl::OkStatus();
- }
-
- absl::Status Open(CalculatorContext* cc) final;
- absl::Status Process(CalculatorContext* cc) final;
-
- int num_output_channels(void) { return num_output_channels_; }
-
- void set_num_output_channels(int num_output_channels) {
- num_output_channels_ = num_output_channels;
- }
-
- private:
- // Takes header and options, and sets up state including calling
- // set_num_output_channels() on the base object.
- virtual absl::Status ConfigureTransform(const TimeSeriesHeader& header,
- CalculatorContext* cc) = 0;
-
- // Takes a vector corresponding to an input frame, and
- // perform the specific transformation to produce an output frame.
- virtual void TransformFrame(const std::vector& input,
- std::vector* output) const = 0;
-
- private:
- int num_output_channels_;
-};
-
-absl::Status FramewiseTransformCalculatorBase::Open(CalculatorContext* cc) {
- TimeSeriesHeader input_header;
- MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
- cc->Inputs().Index(0).Header(), &input_header));
-
- absl::Status status = ConfigureTransform(input_header, cc);
-
- auto output_header = new TimeSeriesHeader(input_header);
- output_header->set_num_channels(num_output_channels_);
- cc->Outputs().Index(0).SetHeader(Adopt(output_header));
-
- cc->SetOffset(0);
-
- return status;
-}
-
-absl::Status FramewiseTransformCalculatorBase::Process(CalculatorContext* cc) {
- const Matrix& input = cc->Inputs().Index(0).Get();
- const int num_frames = input.cols();
- std::unique_ptr output(new Matrix(num_output_channels_, num_frames));
- // The main work here is converting each column of the float Matrix
- // into a vector of doubles, which is what our target functions from
- // dsp_core consume, and doing the reverse with their output.
- std::vector input_frame(input.rows());
- std::vector output_frame(num_output_channels_);
-
- for (int frame = 0; frame < num_frames; ++frame) {
- // Copy input from Eigen::Matrix column to vector.
- Eigen::Map input_frame_map(&input_frame[0],
- input_frame.size(), 1);
- input_frame_map = input.col(frame).cast();
-
- // Perform the actual transformation.
- TransformFrame(input_frame, &output_frame);
-
- // Copy output from vector to Eigen::Vector.
- CHECK_EQ(output_frame.size(), num_output_channels_);
- Eigen::Map output_frame_map(&output_frame[0],
- output_frame.size(), 1);
- output->col(frame) = output_frame_map.cast();
- }
- cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
-
- return absl::OkStatus();
-}
-
-// Calculator wrapper around the dsp/mfcc/mfcc.cc routine.
-// Take frames of squared-magnitude spectra from the SpectrogramCalculator
-// and convert them into Mel Frequency Cepstral Coefficients.
-//
-// Example config:
-// node {
-// calculator: "MfccCalculator"
-// input_stream: "spectrogram_frames_stream"
-// output_stream: "mfcc_frames_stream"
-// options {
-// [mediapipe.MfccCalculatorOptions.ext] {
-// mel_spectrum_params {
-// channel_count: 20
-// min_frequency_hertz: 125.0
-// max_frequency_hertz: 3800.0
-// }
-// mfcc_count: 13
-// }
-// }
-// }
-class MfccCalculator : public FramewiseTransformCalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc) {
- return FramewiseTransformCalculatorBase::GetContract(cc);
- }
-
- private:
- absl::Status ConfigureTransform(const TimeSeriesHeader& header,
- CalculatorContext* cc) override {
- MfccCalculatorOptions mfcc_options = cc->Options();
- mfcc_.reset(new audio_dsp::Mfcc());
- int input_length = header.num_channels();
- // Set up the parameters to the Mfcc object.
- set_num_output_channels(mfcc_options.mfcc_count());
- mfcc_->set_dct_coefficient_count(num_output_channels());
- mfcc_->set_upper_frequency_limit(
- mfcc_options.mel_spectrum_params().max_frequency_hertz());
- mfcc_->set_lower_frequency_limit(
- mfcc_options.mel_spectrum_params().min_frequency_hertz());
- mfcc_->set_filterbank_channel_count(
- mfcc_options.mel_spectrum_params().channel_count());
- // An upstream calculator (such as SpectrogramCalculator) must store
- // the sample rate of its input audio waveform in the TimeSeries Header.
- // audio_dsp::MelFilterBank needs to know this to
- // correctly interpret the spectrogram bins.
- if (!header.has_audio_sample_rate()) {
- return absl::InvalidArgumentError(
- absl::StrCat("No audio_sample_rate in input TimeSeriesHeader ",
- PortableDebugString(header)));
- }
- // Now we can initialize the Mfcc object.
- bool initialized =
- mfcc_->Initialize(input_length, header.audio_sample_rate());
-
- if (initialized) {
- return absl::OkStatus();
- } else {
- return absl::Status(absl::StatusCode::kInternal,
- "Mfcc::Initialize returned uninitialized");
- }
- }
-
- void TransformFrame(const std::vector& input,
- std::vector* output) const override {
- mfcc_->Compute(input, output);
- }
-
- private:
- std::unique_ptr mfcc_;
-};
-REGISTER_CALCULATOR(MfccCalculator);
-
-// Calculator wrapper around the dsp/mfcc/mel_filterbank.cc routine.
-// Take frames of squared-magnitude spectra from the SpectrogramCalculator
-// and convert them into Mel-warped (linear-magnitude) spectra.
-// Note: This code computes a mel-frequency filterbank, using a simple
-// algorithm that gives bad results (some mel channels that are always zero)
-// if you ask for too many channels.
-class MelSpectrumCalculator : public FramewiseTransformCalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc) {
- return FramewiseTransformCalculatorBase::GetContract(cc);
- }
-
- private:
- absl::Status ConfigureTransform(const TimeSeriesHeader& header,
- CalculatorContext* cc) override {
- MelSpectrumCalculatorOptions mel_spectrum_options =
- cc->Options();
- mel_filterbank_.reset(new audio_dsp::MelFilterbank());
- int input_length = header.num_channels();
- set_num_output_channels(mel_spectrum_options.channel_count());
- // An upstream calculator (such as SpectrogramCalculator) must store
- // the sample rate of its input audio waveform in the TimeSeries Header.
- // audio_dsp::MelFilterBank needs to know this to
- // correctly interpret the spectrogram bins.
- if (!header.has_audio_sample_rate()) {
- return absl::InvalidArgumentError(
- absl::StrCat("No audio_sample_rate in input TimeSeriesHeader ",
- PortableDebugString(header)));
- }
- bool initialized = mel_filterbank_->Initialize(
- input_length, header.audio_sample_rate(), num_output_channels(),
- mel_spectrum_options.min_frequency_hertz(),
- mel_spectrum_options.max_frequency_hertz());
-
- if (initialized) {
- return absl::OkStatus();
- } else {
- return absl::Status(absl::StatusCode::kInternal,
- "mfcc::Initialize returned uninitialized");
- }
- }
-
- void TransformFrame(const std::vector& input,
- std::vector* output) const override {
- mel_filterbank_->Compute(input, output);
- }
-
- private:
- std::unique_ptr mel_filterbank_;
-};
-REGISTER_CALCULATOR(MelSpectrumCalculator);
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/mfcc_mel_calculators.proto b/mediapipe/calculators/audio/mfcc_mel_calculators.proto
deleted file mode 100644
index 89af5eb41..000000000
--- a/mediapipe/calculators/audio/mfcc_mel_calculators.proto
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package mediapipe;
-
-import "mediapipe/framework/calculator.proto";
-
-message MelSpectrumCalculatorOptions {
- extend CalculatorOptions {
- optional MelSpectrumCalculatorOptions ext = 78581812;
- }
- // The fields are to populate the config parameters in
- // audio/dsp/mfcc/mel_filterbank.h
- // but the names are chose to mirror
- // audio/hearing/filterbanks/cochlea_gammatone_filterbank.proto
- // and the default values match those in
- // speech/greco3/frontend/filter_bank.proto .
-
- // Total number of frequency bands to use.
- optional int32 channel_count = 1 [default = 20];
- // Lower edge of lowest triangular Mel band.
- optional float min_frequency_hertz = 2 [default = 125.0];
- // Upper edge of highest triangular Mel band.
- optional float max_frequency_hertz = 3 [default = 3800.0];
-}
-
-message MfccCalculatorOptions {
- extend CalculatorOptions {
- optional MfccCalculatorOptions ext = 78450441;
- }
-
- // Specification of the underlying mel filterbank.
- optional MelSpectrumCalculatorOptions mel_spectrum_params = 1;
-
- // How many MFCC coefficients to emit.
- optional uint32 mfcc_count = 2 [default = 13];
-}
diff --git a/mediapipe/calculators/audio/mfcc_mel_calculators_test.cc b/mediapipe/calculators/audio/mfcc_mel_calculators_test.cc
deleted file mode 100644
index e7e312db9..000000000
--- a/mediapipe/calculators/audio/mfcc_mel_calculators_test.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include
-
-#include "Eigen/Core"
-#include "mediapipe/calculators/audio/mfcc_mel_calculators.pb.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/port/gmock.h"
-#include "mediapipe/framework/port/gtest.h"
-#include "mediapipe/util/time_series_test_util.h"
-
-namespace mediapipe {
-
-// Use a sample rate that is unlikely to be a default somewhere.
-const float kAudioSampleRate = 8800.0;
-
-template
-class FramewiseTransformCalculatorTest
- : public TimeSeriesCalculatorTest {
- protected:
- void SetUp() override {
- this->calculator_name_ = CalculatorName;
- this->num_input_channels_ = 129;
- // This is the frame rate coming out of the SpectrogramCalculator.
- this->input_sample_rate_ = 100.0;
- }
-
- // Returns the number of samples per packet.
- int GenerateRandomNonnegInputStream(int num_packets) {
- const double kSecondsPerPacket = 0.2;
- const int num_samples_per_packet =
- kSecondsPerPacket * this->input_sample_rate_;
- for (int i = 0; i < num_packets; ++i) {
- const int timestamp =
- i * kSecondsPerPacket * Timestamp::kTimestampUnitsPerSecond;
- // Mfcc, MelSpectrum expect squared-magnitude inputs, so make
- // sure the input data has no negative values.
- Matrix* sqdata = this->NewRandomMatrix(this->num_input_channels_,
- num_samples_per_packet);
- *sqdata = sqdata->array().square();
- this->AppendInputPacket(sqdata, timestamp);
- }
- return num_samples_per_packet;
- }
-
- void CheckOutputPacketMetadata(int expected_num_channels,
- int expected_num_samples_per_packet) {
- int expected_timestamp = 0;
- for (const auto& packet : this->output().packets) {
- EXPECT_EQ(expected_timestamp, packet.Timestamp().Value());
- expected_timestamp += expected_num_samples_per_packet /
- this->input_sample_rate_ *
- Timestamp::kTimestampUnitsPerSecond;
-
- const Matrix& output_matrix = packet.template Get();
-
- EXPECT_EQ(output_matrix.rows(), expected_num_channels);
- EXPECT_EQ(output_matrix.cols(), expected_num_samples_per_packet);
- }
- }
-
- void SetupGraphAndHeader() {
- this->InitializeGraph();
- this->FillInputHeader();
- }
-
- // Argument is the expected number of dimensions (channels, columns) in
- // the output data from the Calculator under test, which the test should
- // know.
- void SetupRandomInputPackets() {
- constexpr int kNumPackets = 5;
- num_samples_per_packet_ = GenerateRandomNonnegInputStream(kNumPackets);
- }
-
- absl::Status Run() { return this->RunGraph(); }
-
- void CheckResults(int expected_num_channels) {
- const auto& output_header =
- this->output().header.template Get();
- EXPECT_EQ(this->input_sample_rate_, output_header.sample_rate());
- CheckOutputPacketMetadata(expected_num_channels, num_samples_per_packet_);
-
- // Sanity check that output packets have non-zero energy.
- for (const auto& packet : this->output().packets) {
- const Matrix& data = packet.template Get();
- EXPECT_GT(data.squaredNorm(), 0);
- }
- }
-
- // Allows SetupRandomInputPackets() to inform CheckResults() about how
- // big the packets are supposed to be.
- int num_samples_per_packet_;
-};
-
-constexpr char kMfccCalculator[] = "MfccCalculator";
-typedef FramewiseTransformCalculatorTest
- MfccCalculatorTest;
-TEST_F(MfccCalculatorTest, AudioSampleRateFromInputHeader) {
- audio_sample_rate_ = kAudioSampleRate;
- SetupGraphAndHeader();
- SetupRandomInputPackets();
-
- MP_EXPECT_OK(Run());
-
- CheckResults(options_.mfcc_count());
-}
-TEST_F(MfccCalculatorTest, NoAudioSampleRate) {
- // Leave audio_sample_rate_ == kUnset, so it is not present in the
- // input TimeSeriesHeader; expect failure.
- SetupGraphAndHeader();
- SetupRandomInputPackets();
-
- EXPECT_FALSE(Run().ok());
-}
-
-constexpr char kMelSpectrumCalculator[] = "MelSpectrumCalculator";
-typedef FramewiseTransformCalculatorTest
- MelSpectrumCalculatorTest;
-TEST_F(MelSpectrumCalculatorTest, AudioSampleRateFromInputHeader) {
- audio_sample_rate_ = kAudioSampleRate;
- SetupGraphAndHeader();
- SetupRandomInputPackets();
-
- MP_EXPECT_OK(Run());
-
- CheckResults(options_.channel_count());
-}
-TEST_F(MelSpectrumCalculatorTest, NoAudioSampleRate) {
- // Leave audio_sample_rate_ == kUnset, so it is not present in the
- // input TimeSeriesHeader; expect failure.
- SetupGraphAndHeader();
- SetupRandomInputPackets();
-
- EXPECT_FALSE(Run().ok());
-}
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/rational_factor_resample_calculator.cc b/mediapipe/calculators/audio/rational_factor_resample_calculator.cc
deleted file mode 100644
index 1a4210c30..000000000
--- a/mediapipe/calculators/audio/rational_factor_resample_calculator.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2019, 2021 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Defines RationalFactorResampleCalculator.
-
-#include "mediapipe/calculators/audio/rational_factor_resample_calculator.h"
-
-#include "audio/dsp/resampler_q.h"
-
-using audio_dsp::Resampler;
-
-namespace mediapipe {
-absl::Status RationalFactorResampleCalculator::Process(CalculatorContext* cc) {
- return ProcessInternal(cc->Inputs().Index(0).Get(), false, cc);
-}
-
-absl::Status RationalFactorResampleCalculator::Close(CalculatorContext* cc) {
- if (initial_timestamp_ == Timestamp::Unstarted()) {
- return absl::OkStatus();
- }
- Matrix empty_input_frame(num_channels_, 0);
- return ProcessInternal(empty_input_frame, true, cc);
-}
-
-namespace {
-void CopyChannelToVector(const Matrix& matrix, int channel,
- std::vector* vec) {
- vec->resize(matrix.cols());
- Eigen::Map(vec->data(), vec->size()) = matrix.row(channel);
-}
-
-void CopyVectorToChannel(const std::vector& vec, Matrix* matrix,
- int channel) {
- if (matrix->cols() == 0) {
- matrix->resize(matrix->rows(), vec.size());
- } else {
- CHECK_EQ(vec.size(), matrix->cols());
- }
- CHECK_LT(channel, matrix->rows());
- matrix->row(channel) =
- Eigen::Map(vec.data(), vec.size());
-}
-} // namespace
-
-absl::Status RationalFactorResampleCalculator::Open(CalculatorContext* cc) {
- RationalFactorResampleCalculatorOptions resample_options =
- cc->Options();
-
- if (!resample_options.has_target_sample_rate()) {
- return tool::StatusInvalid(
- "resample_options doesn't have target_sample_rate.");
- }
- target_sample_rate_ = resample_options.target_sample_rate();
-
- TimeSeriesHeader input_header;
- MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
- cc->Inputs().Index(0).Header(), &input_header));
-
- source_sample_rate_ = input_header.sample_rate();
- num_channels_ = input_header.num_channels();
-
- // Don't create resamplers for pass-thru (sample rates are equal).
- if (source_sample_rate_ != target_sample_rate_) {
- resampler_.resize(num_channels_);
- for (auto& r : resampler_) {
- r = ResamplerFromOptions(source_sample_rate_, target_sample_rate_,
- resample_options);
- if (!r) {
- LOG(ERROR) << "Failed to initialize resampler.";
- return absl::UnknownError("Failed to initialize resampler.");
- }
- }
- }
-
- TimeSeriesHeader* output_header = new TimeSeriesHeader(input_header);
- output_header->set_sample_rate(target_sample_rate_);
- // The resampler doesn't make guarantees about how many samples will
- // be in each packet.
- output_header->clear_packet_rate();
- output_header->clear_num_samples();
-
- cc->Outputs().Index(0).SetHeader(Adopt(output_header));
- cumulative_output_samples_ = 0;
- cumulative_input_samples_ = 0;
- initial_timestamp_ = Timestamp::Unstarted();
- check_inconsistent_timestamps_ =
- resample_options.check_inconsistent_timestamps();
- return absl::OkStatus();
-}
-
-absl::Status RationalFactorResampleCalculator::ProcessInternal(
- const Matrix& input_frame, bool should_flush, CalculatorContext* cc) {
- if (initial_timestamp_ == Timestamp::Unstarted()) {
- initial_timestamp_ = cc->InputTimestamp();
- }
-
- if (check_inconsistent_timestamps_) {
- time_series_util::LogWarningIfTimestampIsInconsistent(
- cc->InputTimestamp(), initial_timestamp_, cumulative_input_samples_,
- source_sample_rate_);
- }
- Timestamp output_timestamp =
- initial_timestamp_ + ((cumulative_output_samples_ / target_sample_rate_) *
- Timestamp::kTimestampUnitsPerSecond);
-
- cumulative_input_samples_ += input_frame.cols();
- std::unique_ptr output_frame(new Matrix(num_channels_, 0));
- if (resampler_.empty()) {
- // Sample rates were same for input and output; pass-thru.
- *output_frame = input_frame;
- } else {
- if (!Resample(input_frame, output_frame.get(), should_flush)) {
- return absl::UnknownError("Resample() failed.");
- }
- }
- cumulative_output_samples_ += output_frame->cols();
-
- if (output_frame->cols() > 0) {
- cc->Outputs().Index(0).Add(output_frame.release(), output_timestamp);
- }
- return absl::OkStatus();
-}
-
-bool RationalFactorResampleCalculator::Resample(const Matrix& input_frame,
- Matrix* output_frame,
- bool should_flush) {
- std::vector input_vector;
- std::vector output_vector;
- for (int i = 0; i < input_frame.rows(); ++i) {
- CopyChannelToVector(input_frame, i, &input_vector);
- if (should_flush) {
- resampler_[i]->Flush(&output_vector);
- } else {
- resampler_[i]->ProcessSamples(input_vector, &output_vector);
- }
- CopyVectorToChannel(output_vector, output_frame, i);
- }
- return true;
-}
-
-// static
-std::unique_ptr>
-RationalFactorResampleCalculator::ResamplerFromOptions(
- const double source_sample_rate, const double target_sample_rate,
- const RationalFactorResampleCalculatorOptions& options) {
- std::unique_ptr> resampler;
- const auto& rational_factor_options =
- options.resampler_rational_factor_options();
- audio_dsp::QResamplerParams params;
- if (rational_factor_options.has_radius() &&
- rational_factor_options.has_cutoff() &&
- rational_factor_options.has_kaiser_beta()) {
- // Convert RationalFactorResampler kernel parameters to QResampler
- // settings.
- params.filter_radius_factor =
- rational_factor_options.radius() *
- std::min(1.0, target_sample_rate / source_sample_rate);
- params.cutoff_proportion = 2 * rational_factor_options.cutoff() /
- std::min(source_sample_rate, target_sample_rate);
- params.kaiser_beta = rational_factor_options.kaiser_beta();
- }
- // Set large enough so that the resampling factor between common sample
- // rates (e.g. 8kHz, 16kHz, 22.05kHz, 32kHz, 44.1kHz, 48kHz) is exact, and
- // that any factor is represented with error less than 0.025%.
- params.max_denominator = 2000;
-
- // NOTE: QResampler supports multichannel resampling, so the code might be
- // simplified using a single instance rather than one per channel.
- resampler = absl::make_unique>(
- source_sample_rate, target_sample_rate, /*num_channels=*/1, params);
- if (resampler != nullptr && !resampler->Valid()) {
- resampler = std::unique_ptr>();
- }
- return resampler;
-}
-
-REGISTER_CALCULATOR(RationalFactorResampleCalculator);
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/rational_factor_resample_calculator.h b/mediapipe/calculators/audio/rational_factor_resample_calculator.h
deleted file mode 100644
index 325886dc7..000000000
--- a/mediapipe/calculators/audio/rational_factor_resample_calculator.h
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2019, 2021 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef MEDIAPIPE_CALCULATORS_AUDIO_RATIONAL_FACTOR_RESAMPLE_CALCULATOR_H_
-#define MEDIAPIPE_CALCULATORS_AUDIO_RATIONAL_FACTOR_RESAMPLE_CALCULATOR_H_
-
-#include
-#include
-#include
-
-#include "Eigen/Core"
-#include "absl/strings/str_cat.h"
-#include "audio/dsp/resampler.h"
-#include "mediapipe/calculators/audio/rational_factor_resample_calculator.pb.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/integral_types.h"
-#include "mediapipe/framework/port/logging.h"
-#include "mediapipe/util/time_series_util.h"
-
-namespace mediapipe {
-// MediaPipe Calculator for resampling a (vector-valued)
-// input time series with a uniform sample rate. The output
-// stream's sampling rate is specified by target_sample_rate in the
-// RationalFactorResampleCalculatorOptions. The output time series may have
-// a varying number of samples per frame.
-//
-// NOTE: This calculator uses QResampler, despite the name, which supersedes
-// RationalFactorResampler.
-class RationalFactorResampleCalculator : public CalculatorBase {
- public:
- struct TestAccess;
-
- static absl::Status GetContract(CalculatorContract* cc) {
- cc->Inputs().Index(0).Set(
- // Single input stream with TimeSeriesHeader.
- );
- cc->Outputs().Index(0).Set(
- // Resampled stream with TimeSeriesHeader.
- );
- return absl::OkStatus();
- }
- // Returns FAIL if the input stream header is invalid or if the
- // resampler cannot be initialized.
- absl::Status Open(CalculatorContext* cc) override;
- // Resamples a packet of TimeSeries data. Returns FAIL if the
- // resampler state becomes inconsistent.
- absl::Status Process(CalculatorContext* cc) override;
- // Flushes any remaining state. Returns FAIL if the resampler state
- // becomes inconsistent.
- absl::Status Close(CalculatorContext* cc) override;
-
- protected:
- typedef audio_dsp::Resampler ResamplerType;
-
- // Returns a Resampler implementation specified by the
- // RationalFactorResampleCalculatorOptions proto. Returns null if the options
- // specify an invalid resampler.
- static std::unique_ptr ResamplerFromOptions(
- const double source_sample_rate, const double target_sample_rate,
- const RationalFactorResampleCalculatorOptions& options);
-
- // Does Timestamp bookkeeping and resampling common to Process() and
- // Close(). Returns FAIL if the resampler state becomes
- // inconsistent.
- absl::Status ProcessInternal(const Matrix& input_frame, bool should_flush,
- CalculatorContext* cc);
-
- // Uses the internal resampler_ objects to actually resample each
- // row of the input TimeSeries. Returns false if the resampler
- // state becomes inconsistent.
- bool Resample(const Matrix& input_frame, Matrix* output_frame,
- bool should_flush);
-
- double source_sample_rate_;
- double target_sample_rate_;
- int64 cumulative_input_samples_;
- int64 cumulative_output_samples_;
- Timestamp initial_timestamp_;
- bool check_inconsistent_timestamps_;
- int num_channels_;
- std::vector> resampler_;
-};
-
-// Test-only access to RationalFactorResampleCalculator methods.
-struct RationalFactorResampleCalculator::TestAccess {
- static std::unique_ptr ResamplerFromOptions(
- const double source_sample_rate, const double target_sample_rate,
- const RationalFactorResampleCalculatorOptions& options) {
- return RationalFactorResampleCalculator::ResamplerFromOptions(
- source_sample_rate, target_sample_rate, options);
- }
-};
-
-} // namespace mediapipe
-
-#endif // MEDIAPIPE_CALCULATORS_AUDIO_RATIONAL_FACTOR_RESAMPLE_CALCULATOR_H_
diff --git a/mediapipe/calculators/audio/rational_factor_resample_calculator.proto b/mediapipe/calculators/audio/rational_factor_resample_calculator.proto
deleted file mode 100644
index 97d7f202c..000000000
--- a/mediapipe/calculators/audio/rational_factor_resample_calculator.proto
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2019, 2021 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package mediapipe;
-
-import "mediapipe/framework/calculator.proto";
-
-// NOTE: This calculator uses QResampler, despite the name, which supersedes
-// RationalFactorResampler.
-message RationalFactorResampleCalculatorOptions {
- extend CalculatorOptions {
- optional RationalFactorResampleCalculatorOptions ext = 259760074;
- }
-
- // target_sample_rate is the sample rate, in Hertz, of the output
- // stream. Required. Must be greater than 0.
- optional double target_sample_rate = 1;
-
- // Parameters for initializing QResampler. See QResampler for more details.
- message ResamplerRationalFactorOptions {
- // Kernel radius in units of input samples.
- optional double radius = 1;
- // Anti-aliasing cutoff frequency in Hertz. A reasonable setting is
- // 0.45 * min(input_sample_rate, output_sample_rate).
- optional double cutoff = 2;
- // The Kaiser beta parameter for the kernel window.
- optional double kaiser_beta = 3 [default = 6.0];
- }
- optional ResamplerRationalFactorOptions resampler_rational_factor_options = 2;
-
- // Set to false to disable checks for jitter in timestamp values. Useful with
- // live audio input.
- optional bool check_inconsistent_timestamps = 3 [default = true];
-}
diff --git a/mediapipe/calculators/audio/rational_factor_resample_calculator_test.cc b/mediapipe/calculators/audio/rational_factor_resample_calculator_test.cc
deleted file mode 100644
index 6ae360303..000000000
--- a/mediapipe/calculators/audio/rational_factor_resample_calculator_test.cc
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mediapipe/calculators/audio/rational_factor_resample_calculator.h"
-
-#include
-
-#include
-#include
-#include
-
-#include "Eigen/Core"
-#include "audio/dsp/signal_vector_util.h"
-#include "mediapipe/calculators/audio/rational_factor_resample_calculator.pb.h"
-#include "mediapipe/framework//tool/validate_type.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/calculator_runner.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/gmock.h"
-#include "mediapipe/framework/port/gtest.h"
-#include "mediapipe/framework/port/status.h"
-#include "mediapipe/util/time_series_test_util.h"
-
-namespace mediapipe {
-namespace {
-
-const int kInitialTimestampOffsetMilliseconds = 4;
-
-class RationalFactorResampleCalculatorTest
- : public TimeSeriesCalculatorTest {
- protected:
- void SetUp() override {
- calculator_name_ = "RationalFactorResampleCalculator";
- input_sample_rate_ = 4000.0;
- num_input_channels_ = 3;
- }
-
- // Expects two vectors whose lengths are almost the same and whose
- // elements are equal (for indices that are present in both).
- //
- // This is useful because the resampler doesn't make precise
- // guarantees about its output size.
- void ExpectVectorMostlyFloatEq(const std::vector& expected,
- const std::vector& actual) {
- // Lengths should be close, but don't have to be equal.
- ASSERT_NEAR(expected.size(), actual.size(), 1);
- for (int i = 0; i < std::min(expected.size(), actual.size()); ++i) {
- EXPECT_FLOAT_EQ(expected[i], actual[i]) << " where i=" << i << ".";
- }
- }
-
- // Returns a float value with the sample, channel, and timestamp
- // separated by a few orders of magnitude, for easy parsing by
- // humans.
- double TestValue(int sample, int channel, int timestamp_in_microseconds) {
- return timestamp_in_microseconds * 100.0 + sample + channel / 10.0;
- }
-
- // Caller takes ownership of the returned value.
- Matrix* NewTestFrame(int num_channels, int num_samples, int timestamp) {
- auto matrix = new Matrix(num_channels, num_samples);
- for (int c = 0; c < num_channels; ++c) {
- for (int i = 0; i < num_samples; ++i) {
- (*matrix)(c, i) = TestValue(i, c, timestamp);
- }
- }
- return matrix;
- }
-
- // Initializes and runs the test graph.
- absl::Status Run(double output_sample_rate) {
- options_.set_target_sample_rate(output_sample_rate);
- InitializeGraph();
-
- FillInputHeader();
- concatenated_input_samples_.resize(num_input_channels_, 0);
- num_input_samples_ = 0;
- for (int i = 0; i < 5; ++i) {
- int packet_size = (i + 1) * 10;
- int timestamp = kInitialTimestampOffsetMilliseconds +
- num_input_samples_ / input_sample_rate_ *
- Timestamp::kTimestampUnitsPerSecond;
- Matrix* data_frame =
- NewTestFrame(num_input_channels_, packet_size, timestamp);
-
- // Keep a reference copy of the input.
- //
- // conservativeResize() is needed here to preserve the existing
- // data. Eigen's resize() resizes without preserving data.
- concatenated_input_samples_.conservativeResize(
- num_input_channels_, num_input_samples_ + packet_size);
- concatenated_input_samples_.rightCols(packet_size) = *data_frame;
- num_input_samples_ += packet_size;
-
- AppendInputPacket(data_frame, timestamp);
- }
-
- return RunGraph();
- }
-
- void CheckOutputLength(double output_sample_rate) {
- double factor = output_sample_rate / input_sample_rate_;
-
- int num_output_samples = 0;
- for (const Packet& packet : output().packets) {
- num_output_samples += packet.Get().cols();
- }
-
- // The exact number of expected samples may vary based on the implementation
- // of the resampler since the exact value is not an integer.
- const double expected_num_output_samples = num_input_samples_ * factor;
- EXPECT_LE(ceil(expected_num_output_samples), num_output_samples);
- EXPECT_GE(ceil(expected_num_output_samples) + 11, num_output_samples);
- }
-
- // Checks that output timestamps are consistent with the
- // output_sample_rate and output packet sizes.
- void CheckOutputPacketTimestamps(double output_sample_rate) {
- int num_output_samples = 0;
- for (const Packet& packet : output().packets) {
- const int expected_timestamp = kInitialTimestampOffsetMilliseconds +
- num_output_samples / output_sample_rate *
- Timestamp::kTimestampUnitsPerSecond;
- EXPECT_NEAR(expected_timestamp, packet.Timestamp().Value(), 1);
- num_output_samples += packet.Get().cols();
- }
- }
-
- // Checks that output values from the calculator (which resamples
- // packet-by-packet) are consistent with resampling the entire
- // signal at once.
- void CheckOutputValues(double output_sample_rate) {
- for (int i = 0; i < num_input_channels_; ++i) {
- auto verification_resampler =
- RationalFactorResampleCalculator::TestAccess::ResamplerFromOptions(
- input_sample_rate_, output_sample_rate, options_);
-
- std::vector input_data;
- for (int j = 0; j < num_input_samples_; ++j) {
- input_data.push_back(concatenated_input_samples_(i, j));
- }
- std::vector expected_resampled_data;
- std::vector temp;
- verification_resampler->ProcessSamples(input_data, &temp);
- audio_dsp::VectorAppend(&expected_resampled_data, temp);
- verification_resampler->Flush(&temp);
- audio_dsp::VectorAppend(&expected_resampled_data, temp);
- std::vector actual_resampled_data;
- for (const Packet& packet : output().packets) {
- Matrix output_frame_row = packet.Get().row(i);
- actual_resampled_data.insert(
- actual_resampled_data.end(), &output_frame_row(0),
- &output_frame_row(0) + output_frame_row.cols());
- }
-
- ExpectVectorMostlyFloatEq(expected_resampled_data, actual_resampled_data);
- }
- }
-
- void CheckOutputHeaders(double output_sample_rate) {
- const TimeSeriesHeader& output_header =
- output().header.Get();
- TimeSeriesHeader expected_header;
- expected_header.set_sample_rate(output_sample_rate);
- expected_header.set_num_channels(num_input_channels_);
- EXPECT_THAT(output_header, mediapipe::EqualsProto(expected_header));
- }
-
- void CheckOutput(double output_sample_rate) {
- CheckOutputLength(output_sample_rate);
- CheckOutputPacketTimestamps(output_sample_rate);
- CheckOutputValues(output_sample_rate);
- CheckOutputHeaders(output_sample_rate);
- }
-
- void CheckOutputUnchanged() {
- for (int i = 0; i < num_input_channels_; ++i) {
- std::vector expected_resampled_data;
- for (int j = 0; j < num_input_samples_; ++j) {
- expected_resampled_data.push_back(concatenated_input_samples_(i, j));
- }
- std::vector actual_resampled_data;
- for (const Packet& packet : output().packets) {
- Matrix output_frame_row = packet.Get().row(i);
- actual_resampled_data.insert(
- actual_resampled_data.end(), &output_frame_row(0),
- &output_frame_row(0) + output_frame_row.cols());
- }
- ExpectVectorMostlyFloatEq(expected_resampled_data, actual_resampled_data);
- }
- }
-
- int num_input_samples_;
- Matrix concatenated_input_samples_;
-};
-
-TEST_F(RationalFactorResampleCalculatorTest, Upsample) {
- const double kUpsampleRate = input_sample_rate_ * 1.9;
- MP_ASSERT_OK(Run(kUpsampleRate));
- CheckOutput(kUpsampleRate);
-}
-
-TEST_F(RationalFactorResampleCalculatorTest, Downsample) {
- const double kDownsampleRate = input_sample_rate_ / 1.9;
- MP_ASSERT_OK(Run(kDownsampleRate));
- CheckOutput(kDownsampleRate);
-}
-
-TEST_F(RationalFactorResampleCalculatorTest, UsesRationalFactorResampler) {
- const double kUpsampleRate = input_sample_rate_ * 2;
- MP_ASSERT_OK(Run(kUpsampleRate));
- CheckOutput(kUpsampleRate);
-}
-
-TEST_F(RationalFactorResampleCalculatorTest, PassthroughIfSampleRateUnchanged) {
- const double kUpsampleRate = input_sample_rate_;
- MP_ASSERT_OK(Run(kUpsampleRate));
- CheckOutputUnchanged();
-}
-
-TEST_F(RationalFactorResampleCalculatorTest, FailsOnBadTargetRate) {
- ASSERT_FALSE(Run(-999.9).ok()); // Invalid output sample rate.
-}
-
-TEST_F(RationalFactorResampleCalculatorTest, DoesNotDieOnEmptyInput) {
- options_.set_target_sample_rate(input_sample_rate_);
- InitializeGraph();
- FillInputHeader();
- MP_ASSERT_OK(RunGraph());
- EXPECT_TRUE(output().packets.empty());
-}
-
-} // anonymous namespace
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/spectrogram_calculator.cc b/mediapipe/calculators/audio/spectrogram_calculator.cc
deleted file mode 100644
index bd2234f86..000000000
--- a/mediapipe/calculators/audio/spectrogram_calculator.cc
+++ /dev/null
@@ -1,452 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Defines SpectrogramCalculator.
-#include
-
-#include
-#include
-#include
-#include
-
-#include "Eigen/Core"
-#include "absl/strings/string_view.h"
-#include "audio/dsp/spectrogram/spectrogram.h"
-#include "audio/dsp/window_functions.h"
-#include "mediapipe/calculators/audio/spectrogram_calculator.pb.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/core_proto_inc.h"
-#include "mediapipe/framework/port/integral_types.h"
-#include "mediapipe/framework/port/logging.h"
-#include "mediapipe/framework/port/ret_check.h"
-#include "mediapipe/framework/port/source_location.h"
-#include "mediapipe/framework/port/status_builder.h"
-#include "mediapipe/util/time_series_util.h"
-
-namespace mediapipe {
-
-// MediaPipe Calculator for computing the "spectrogram" (short-time Fourier
-// transform squared-magnitude, by default) of a multichannel input
-// time series, including optionally overlapping frames. Options are
-// specified in SpectrogramCalculatorOptions proto (where names are chosen
-// to mirror TimeSeriesFramerCalculator):
-//
-// Result is a MatrixData record (for single channel input and when the
-// allow_multichannel_input flag is false), or a vector of MatrixData records,
-// one for each channel (when the allow_multichannel_input flag is set). The
-// rows of each spectrogram matrix correspond to the n_fft/2+1 unique complex
-// values, or squared/linear/dB magnitudes, depending on the output_type option.
-// Each input packet will result in zero or one output packets, each containing
-// one Matrix for each channel of the input, where each Matrix has one or more
-// columns of spectral values, one for each complete frame of input samples. If
-// the input packet contains too few samples to trigger a new output frame, no
-// output packet is generated (since zero-length packets are not legal since
-// they would result in timestamps that were equal, not strictly increasing).
-//
-// Output packet Timestamps are set to the beginning of each frame. This is to
-// allow calculators downstream from SpectrogramCalculator to have aligned
-// Timestamps regardless of a packet's signal length.
-//
-// Both frame_duration_seconds and frame_overlap_seconds will be
-// rounded to the nearest integer number of samples. Conseqently, all output
-// frames will be based on the same number of input samples, and each
-// analysis frame will advance from its predecessor by the same time step.
-class SpectrogramCalculator : public CalculatorBase {
- public:
- static absl::Status GetContract(CalculatorContract* cc) {
- cc->Inputs().Index(0).Set(
- // Input stream with TimeSeriesHeader.
- );
-
- SpectrogramCalculatorOptions spectrogram_options =
- cc->Options();
- if (!spectrogram_options.allow_multichannel_input()) {
- if (spectrogram_options.output_type() ==
- SpectrogramCalculatorOptions::COMPLEX) {
- cc->Outputs().Index(0).Set(
- // Complex spectrogram frames with TimeSeriesHeader.
- );
- } else {
- cc->Outputs().Index(0).Set(
- // Spectrogram frames with TimeSeriesHeader.
- );
- }
- } else {
- if (spectrogram_options.output_type() ==
- SpectrogramCalculatorOptions::COMPLEX) {
- cc->Outputs().Index(0).Set>(
- // Complex spectrogram frames with MultiStreamTimeSeriesHeader.
- );
- } else {
- cc->Outputs().Index(0).Set>(
- // Spectrogram frames with MultiStreamTimeSeriesHeader.
- );
- }
- }
- return absl::OkStatus();
- }
-
- // Returns FAIL if the input stream header is invalid.
- absl::Status Open(CalculatorContext* cc) override;
-
- // Outputs at most one packet consisting of a single Matrix with one or
- // more columns containing the spectral values from as many input frames
- // as are completed by the input samples. Always returns OK.
- absl::Status Process(CalculatorContext* cc) override;
-
- // Performs zero-padding and processing of any remaining samples
- // if pad_final_packet is set.
- // Returns OK.
- absl::Status Close(CalculatorContext* cc) override;
-
- private:
- Timestamp CurrentOutputTimestamp(CalculatorContext* cc) {
- if (use_local_timestamp_) {
- const Timestamp now = cc->InputTimestamp();
- if (now == Timestamp::Done()) {
- // During Close the timestamp is not available, send an estimate.
- return last_local_output_timestamp_ +
- round(last_completed_frames_ * frame_step_samples() *
- Timestamp::kTimestampUnitsPerSecond / input_sample_rate_);
- }
- last_local_output_timestamp_ = now;
- return now;
- }
- return CumulativeOutputTimestamp();
- }
-
- Timestamp CumulativeOutputTimestamp() {
- // Cumulative output timestamp is the *center* of the next frame to be
- // emitted, hence delayed by half a window duration compared to relevant
- // input timestamp.
- return initial_input_timestamp_ +
- round(cumulative_completed_frames_ * frame_step_samples() *
- Timestamp::kTimestampUnitsPerSecond / input_sample_rate_);
- }
-
- int frame_step_samples() const {
- return frame_duration_samples_ - frame_overlap_samples_;
- }
-
- // Take the next set of input samples, already translated into a
- // vector and pass them to the spectrogram object.
- // Convert the output of the spectrogram object into a Matrix (or an
- // Eigen::MatrixXcf if complex-valued output is requested) and pass to
- // MediaPipe output.
- absl::Status ProcessVector(const Matrix& input_stream, CalculatorContext* cc);
-
- // Templated function to process either real- or complex-output spectrogram.
- template
- absl::Status ProcessVectorToOutput(
- const Matrix& input_stream,
- const OutputMatrixType postprocess_output_fn(const OutputMatrixType&),
- CalculatorContext* cc);
-
- // Use the MediaPipe timestamp instead of the estimated one. Useful when the
- // data is intermittent.
- bool use_local_timestamp_;
- Timestamp last_local_output_timestamp_;
-
- double input_sample_rate_;
- bool pad_final_packet_;
- int frame_duration_samples_;
- int frame_overlap_samples_;
- // How many samples we've been passed, used for checking input time stamps.
- int64 cumulative_input_samples_;
- // How many frames we've emitted, used for calculating output time stamps.
- int64 cumulative_completed_frames_;
- // How many frames were emitted last, used for estimating the timestamp on
- // Close when use_local_timestamp_ is true;
- int64 last_completed_frames_;
- Timestamp initial_input_timestamp_;
- int num_input_channels_;
- // How many frequency bins we emit (=N_FFT/2 + 1).
- int num_output_channels_;
- // Which output type?
- int output_type_;
- // Output type: mono or multichannel.
- bool allow_multichannel_input_;
- // Vector of Spectrogram objects, one for each channel.
- std::vector> spectrogram_generators_;
- // Fixed scale factor applied to output values (regardless of type).
- double output_scale_;
-
- static const float kLnPowerToDb;
-};
-REGISTER_CALCULATOR(SpectrogramCalculator);
-
-// Factor to convert ln(magnitude_squared) to deciBels = 10.0/ln(10.0).
-const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
-
-absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
- SpectrogramCalculatorOptions spectrogram_options =
- cc->Options();
-
- use_local_timestamp_ = spectrogram_options.use_local_timestamp();
-
- if (spectrogram_options.frame_duration_seconds() <= 0.0) {
- // TODO: return an error.
- }
- if (spectrogram_options.frame_overlap_seconds() >=
- spectrogram_options.frame_duration_seconds()) {
- // TODO: return an error.
- }
- if (spectrogram_options.frame_overlap_seconds() < 0.0) {
- // TODO: return an error.
- }
-
- TimeSeriesHeader input_header;
- MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
- cc->Inputs().Index(0).Header(), &input_header));
-
- input_sample_rate_ = input_header.sample_rate();
- num_input_channels_ = input_header.num_channels();
-
- if (!spectrogram_options.allow_multichannel_input() &&
- num_input_channels_ != 1) {
- // TODO: return an error.
- }
-
- frame_duration_samples_ =
- round(spectrogram_options.frame_duration_seconds() * input_sample_rate_);
- frame_overlap_samples_ =
- round(spectrogram_options.frame_overlap_seconds() * input_sample_rate_);
-
- pad_final_packet_ = spectrogram_options.pad_final_packet();
- output_type_ = spectrogram_options.output_type();
- allow_multichannel_input_ = spectrogram_options.allow_multichannel_input();
-
- output_scale_ = spectrogram_options.output_scale();
-
- std::vector window;
- switch (spectrogram_options.window_type()) {
- case SpectrogramCalculatorOptions::COSINE:
- audio_dsp::CosineWindow().GetPeriodicSamples(frame_duration_samples_,
- &window);
- break;
- case SpectrogramCalculatorOptions::HANN:
- audio_dsp::HannWindow().GetPeriodicSamples(frame_duration_samples_,
- &window);
- break;
- case SpectrogramCalculatorOptions::HAMMING:
- audio_dsp::HammingWindow().GetPeriodicSamples(frame_duration_samples_,
- &window);
- break;
- }
-
- // Propagate settings down to the actual Spectrogram object.
- spectrogram_generators_.clear();
- for (int i = 0; i < num_input_channels_; i++) {
- spectrogram_generators_.push_back(
- std::unique_ptr(new audio_dsp::Spectrogram()));
- spectrogram_generators_[i]->Initialize(window, frame_step_samples());
- }
-
- num_output_channels_ =
- spectrogram_generators_[0]->output_frequency_channels();
- std::unique_ptr output_header(
- new TimeSeriesHeader(input_header));
- // Store the actual sample rate of the input audio in the TimeSeriesHeader
- // so that subsequent calculators can figure out the frequency scale of
- // our output.
- output_header->set_audio_sample_rate(input_sample_rate_);
- // Setup rest of output header.
- output_header->set_num_channels(num_output_channels_);
- output_header->set_sample_rate(input_sample_rate_ / frame_step_samples());
- // Although we usually generate one output packet for each input
- // packet, this might not be true for input packets whose size is smaller
- // than the analysis window length. So we clear output_header.packet_rate
- // because we can't guarantee a constant packet rate. Similarly, the number
- // of output frames per packet depends on the input packet, so we also clear
- // output_header.num_samples.
- output_header->clear_packet_rate();
- output_header->clear_num_samples();
- if (!spectrogram_options.allow_multichannel_input()) {
- cc->Outputs().Index(0).SetHeader(Adopt(output_header.release()));
- } else {
- std::unique_ptr multichannel_output_header(
- new MultiStreamTimeSeriesHeader());
- *multichannel_output_header->mutable_time_series_header() = *output_header;
- multichannel_output_header->set_num_streams(num_input_channels_);
- cc->Outputs().Index(0).SetHeader(
- Adopt(multichannel_output_header.release()));
- }
- cumulative_completed_frames_ = 0;
- last_completed_frames_ = 0;
- initial_input_timestamp_ = Timestamp::Unstarted();
- if (use_local_timestamp_) {
- // Inform the framework that the calculator will output packets at the same
- // timestamps as input packets to enable packet queueing optimizations. The
- // final packet (emitted from Close()) does not follow this rule but it's
- // sufficient that its timestamp is strictly greater than the timestamp of
- // the previous packet.
- cc->SetOffset(0);
- }
- return absl::OkStatus();
-}
-
-absl::Status SpectrogramCalculator::Process(CalculatorContext* cc) {
- if (initial_input_timestamp_ == Timestamp::Unstarted()) {
- initial_input_timestamp_ = cc->InputTimestamp();
- }
-
- const Matrix& input_stream = cc->Inputs().Index(0).Get();
- if (input_stream.rows() != num_input_channels_) {
- // TODO: return an error.
- }
-
- cumulative_input_samples_ += input_stream.cols();
-
- return ProcessVector(input_stream, cc);
-}
-
-template
-absl::Status SpectrogramCalculator::ProcessVectorToOutput(
- const Matrix& input_stream,
- const OutputMatrixType postprocess_output_fn(const OutputMatrixType&),
- CalculatorContext* cc) {
- std::unique_ptr> spectrogram_matrices(
- new std::vector());
- std::vector> output_vectors;
-
- // Compute a spectrogram for each channel.
- int num_output_time_frames;
- for (int channel = 0; channel < input_stream.rows(); ++channel) {
- output_vectors.clear();
-
- // Copy one row (channel) of the input matrix into the std::vector.
- std::vector input_vector(input_stream.cols());
- Eigen::Map(&input_vector[0], 1, input_vector.size()) =
- input_stream.row(channel);
-
- if (!spectrogram_generators_[channel]->ComputeSpectrogram(
- input_vector, &output_vectors)) {
- return absl::Status(absl::StatusCode::kInternal,
- "Spectrogram returned failure");
- }
- if (channel == 0) {
- // Record the number of time frames we expect from each channel.
- num_output_time_frames = output_vectors.size();
- } else {
- RET_CHECK_EQ(output_vectors.size(), num_output_time_frames)
- << "Inconsistent spectrogram time frames for channel " << channel;
- }
- // Skip remaining processing if there are too few input samples to trigger
- // any output frames.
- if (!output_vectors.empty()) {
- // Translate the returned values into a matrix of output frames.
- OutputMatrixType output_frames(num_output_channels_,
- output_vectors.size());
- for (int frame = 0; frame < output_vectors.size(); ++frame) {
- Eigen::Map frame_map(
- &output_vectors[frame][0], output_vectors[frame].size(), 1);
- // The underlying dsp object returns squared magnitudes; here
- // we optionally translate to linear magnitude or dB.
- output_frames.col(frame) =
- output_scale_ * postprocess_output_fn(frame_map);
- }
- spectrogram_matrices->push_back(output_frames);
- }
- }
- // If the input is very short, there may not be enough accumulated,
- // unprocessed samples to cause any new frames to be generated by
- // the spectrogram object. If so, we don't want to emit
- // a packet at all.
- if (!spectrogram_matrices->empty()) {
- RET_CHECK_EQ(spectrogram_matrices->size(), input_stream.rows())
- << "Inconsistent number of spectrogram channels.";
- if (allow_multichannel_input_) {
- cc->Outputs().Index(0).Add(spectrogram_matrices.release(),
- CurrentOutputTimestamp(cc));
- } else {
- cc->Outputs().Index(0).Add(
- new OutputMatrixType(spectrogram_matrices->at(0)),
- CurrentOutputTimestamp(cc));
- }
- cumulative_completed_frames_ += output_vectors.size();
- last_completed_frames_ = output_vectors.size();
- if (!use_local_timestamp_) {
- // In non-local timestamp mode the timestamp of the next packet will be
- // equal to CumulativeOutputTimestamp(). Inform the framework about this
- // fact to enable packet queueing optimizations.
- cc->Outputs().Index(0).SetNextTimestampBound(CumulativeOutputTimestamp());
- }
- }
- return absl::OkStatus();
-}
-
-absl::Status SpectrogramCalculator::ProcessVector(const Matrix& input_stream,
- CalculatorContext* cc) {
- switch (output_type_) {
- // These blocks deliberately ignore clang-format to preserve the
- // "silhouette" of the different cases.
- // clang-format off
- case SpectrogramCalculatorOptions::COMPLEX: {
- return ProcessVectorToOutput(
- input_stream,
- +[](const Eigen::MatrixXcf& col) -> const Eigen::MatrixXcf {
- return col;
- }, cc);
- }
- case SpectrogramCalculatorOptions::SQUARED_MAGNITUDE: {
- return ProcessVectorToOutput(
- input_stream,
- +[](const Matrix& col) -> const Matrix {
- return col;
- }, cc);
- }
- case SpectrogramCalculatorOptions::LINEAR_MAGNITUDE: {
- return ProcessVectorToOutput(
- input_stream,
- +[](const Matrix& col) -> const Matrix {
- return col.array().sqrt().matrix();
- }, cc);
- }
- case SpectrogramCalculatorOptions::DECIBELS: {
- return ProcessVectorToOutput(
- input_stream,
- +[](const Matrix& col) -> const Matrix {
- return kLnPowerToDb * col.array().log().matrix();
- }, cc);
- }
- // clang-format on
- default: {
- return absl::Status(absl::StatusCode::kInvalidArgument,
- "Unrecognized spectrogram output type.");
- }
- }
-}
-
-absl::Status SpectrogramCalculator::Close(CalculatorContext* cc) {
- if (cumulative_input_samples_ > 0 && pad_final_packet_) {
- // We can flush any remaining samples by sending frame_step_samples - 1
- // zeros to the Process method, and letting it do its thing,
- // UNLESS we have fewer than one window's worth of samples, in which case
- // we pad to exactly one frame_duration_samples.
- // Release the memory for the Spectrogram objects.
- int required_padding_samples = frame_step_samples() - 1;
- if (cumulative_input_samples_ < frame_duration_samples_) {
- required_padding_samples =
- frame_duration_samples_ - cumulative_input_samples_;
- }
- return ProcessVector(
- Matrix::Zero(num_input_channels_, required_padding_samples), cc);
- }
-
- return absl::OkStatus();
-}
-
-} // namespace mediapipe
diff --git a/mediapipe/calculators/audio/spectrogram_calculator.proto b/mediapipe/calculators/audio/spectrogram_calculator.proto
deleted file mode 100644
index b721117d4..000000000
--- a/mediapipe/calculators/audio/spectrogram_calculator.proto
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package mediapipe;
-
-import "mediapipe/framework/calculator.proto";
-
-message SpectrogramCalculatorOptions {
- extend CalculatorOptions {
- optional SpectrogramCalculatorOptions ext = 76186688;
- }
-
- // Options mirror those of TimeSeriesFramerCalculator.
-
- // Analysis window duration in seconds. Required. Must be greater than 0.
- // (Note: the spectrogram DFT length will be the smallest power-of-2
- // sample count that can hold this duration.)
- optional double frame_duration_seconds = 1;
-
- // Duration of overlap between adjacent windows.
- // Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
- // Required that 0 <= frame_overlap_seconds < frame_duration_seconds.
- optional double frame_overlap_seconds = 2 [default = 0.0];
-
- // Whether to pad the final packet with zeros. If true, guarantees that
- // all input samples will output. If set to false, any partial packet
- // at the end of the stream will be dropped.
- optional bool pad_final_packet = 3 [default = true];
-
- // Output value type can be squared-magnitude, linear-magnitude,
- // deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
- enum OutputType {
- SQUARED_MAGNITUDE = 0;
- LINEAR_MAGNITUDE = 1;
- DECIBELS = 2;
- COMPLEX = 3;
- }
- optional OutputType output_type = 4 [default = SQUARED_MAGNITUDE];
-
- // If set to true then the output will be a vector of spectrograms, one for
- // each channel and the stream will have a MultiStreamTimeSeriesHeader.
- optional bool allow_multichannel_input = 5 [default = false];
-
- // Which window to use when computing the FFT.
- enum WindowType {
- HANN = 0;
- HAMMING = 1;
- COSINE = 2;
- }
- optional WindowType window_type = 6 [default = HANN];
-
- // Support a fixed multiplicative scaling of the output. This is applied
- // uniformly regardless of output type (i.e., even dBs are multiplied, not
- // offset).
- optional double output_scale = 7 [default = 1.0];
-
- // If use_local_timestamp is true, the output packet's timestamp is based on
- // the last sample of the packet and it's inferred from the latest input
- // packet's timestamp. If false, the output packet's timestamp is based on
- // the cumulative timestamping, which is inferred from the intial input
- // timestamp and the cumulative number of samples.
- optional bool use_local_timestamp = 8 [default = false];
-}
diff --git a/mediapipe/calculators/audio/spectrogram_calculator_test.cc b/mediapipe/calculators/audio/spectrogram_calculator_test.cc
deleted file mode 100644
index 3c2b8435d..000000000
--- a/mediapipe/calculators/audio/spectrogram_calculator_test.cc
+++ /dev/null
@@ -1,895 +0,0 @@
-// Copyright 2019 The MediaPipe Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "Eigen/Core"
-#include "audio/dsp/number_util.h"
-#include "mediapipe/calculators/audio/spectrogram_calculator.pb.h"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/calculator_runner.h"
-#include "mediapipe/framework/formats/matrix.h"
-#include "mediapipe/framework/formats/time_series_header.pb.h"
-#include "mediapipe/framework/port/benchmark.h"
-#include "mediapipe/framework/port/gmock.h"
-#include "mediapipe/framework/port/gtest.h"
-#include "mediapipe/framework/port/integral_types.h"
-#include "mediapipe/framework/port/status.h"
-#include "mediapipe/util/time_series_test_util.h"
-
-namespace mediapipe {
-namespace {
-
-const int kInitialTimestampOffsetMicroseconds = 4;
-
-class SpectrogramCalculatorTest
- : public TimeSeriesCalculatorTest {
- protected:
- void SetUp() override {
- calculator_name_ = "SpectrogramCalculator";
- input_sample_rate_ = 4000.0;
- num_input_channels_ = 1;
- }
-
- // Initializes and runs the test graph.
- absl::Status Run() {
- // Now that options are set, we can set up some internal constants.
- frame_duration_samples_ =
- round(options_.frame_duration_seconds() * input_sample_rate_);
- frame_step_samples_ =
- frame_duration_samples_ -
- round(options_.frame_overlap_seconds() * input_sample_rate_);
- // The magnitude of the 0th FFT bin (DC) should be sum(input.*window);
- // for an input identically 1.0, this is just sum(window). The average
- // value of our Hann window is 0.5, hence this is the expected squared-
- // magnitude output value in the DC bin for constant input of 1.0.
- expected_dc_squared_magnitude_ =
- pow((static_cast(frame_duration_samples_) * 0.5), 2.0);
-
- return RunGraph();
- }
-
- // Creates test multichannel input with specified packet sizes and containing
- // a constant-frequency sinusoid that maintains phase between adjacent
- // packets.
- void SetupCosineInputPackets(const std::vector& packet_sizes_samples,
- float cosine_frequency_hz) {
- int total_num_input_samples = 0;
- for (int packet_size_samples : packet_sizes_samples) {
- double packet_start_time_seconds =
- kInitialTimestampOffsetMicroseconds * 1e-6 +
- total_num_input_samples / input_sample_rate_;
- double packet_end_time_seconds =
- packet_start_time_seconds + packet_size_samples / input_sample_rate_;
- double angular_freq = 2 * M_PI * cosine_frequency_hz;
- Matrix* packet_data =
- new Matrix(num_input_channels_, packet_size_samples);
- // Use Eigen's vectorized cos() function to fill the vector with a
- // sinusoid of appropriate frequency & phase.
- for (int i = 0; i < num_input_channels_; i++) {
- packet_data->row(i) =
- Eigen::ArrayXf::LinSpaced(packet_size_samples,
- packet_start_time_seconds * angular_freq,
- packet_end_time_seconds * angular_freq)
- .cos()
- .transpose();
- }
- int64 input_timestamp = round(packet_start_time_seconds *
- Timestamp::kTimestampUnitsPerSecond);
- AppendInputPacket(packet_data, input_timestamp);
- total_num_input_samples += packet_size_samples;
- }
- }
-
- // Setup a sequence of input packets of specified sizes, each filled
- // with samples of 1.0.
- void SetupConstantInputPackets(const std::vector& packet_sizes_samples) {
- // A 0 Hz cosine is identically 1.0 for all samples.
- SetupCosineInputPackets(packet_sizes_samples, 0.0);
- }
-
- // Setup a sequence of input packets of specified sizes, each containing a
- // single sample of 1.0 at a specified offset.
- void SetupImpulseInputPackets(
- const std::vector& packet_sizes_samples,
- const std::vector& impulse_offsets_samples) {
- int total_num_input_samples = 0;
- for (int i = 0; i < packet_sizes_samples.size(); ++i) {
- double packet_start_time_seconds =
- kInitialTimestampOffsetMicroseconds * 1e-6 +
- total_num_input_samples / input_sample_rate_;
- int64 input_timestamp = round(packet_start_time_seconds *
- Timestamp::kTimestampUnitsPerSecond);
- std::unique_ptr impulse(
- new Matrix(Matrix::Zero(1, packet_sizes_samples[i])));
- (*impulse)(0, impulse_offsets_samples[i]) = 1.0;
- AppendInputPacket(impulse.release(), input_timestamp);
- total_num_input_samples += packet_sizes_samples[i];
- }
- }
-
- // Creates test multichannel input with specified packet sizes and containing
- // constant input packets for the even channels and constant-frequency
- // sinusoid that maintains phase between adjacent packets for the odd
- // channels.
- void SetupMultichannelInputPackets(
- const std::vector& packet_sizes_samples, float cosine_frequency_hz) {
- int total_num_input_samples = 0;
- for (int packet_size_samples : packet_sizes_samples) {
- double packet_start_time_seconds =
- kInitialTimestampOffsetMicroseconds * 1e-6 +
- total_num_input_samples / input_sample_rate_;
- double packet_end_time_seconds =
- packet_start_time_seconds + packet_size_samples / input_sample_rate_;
- double angular_freq;
- Matrix* packet_data =
- new Matrix(num_input_channels_, packet_size_samples);
- // Use Eigen's vectorized cos() function to fill the vector with a
- // sinusoid of appropriate frequency & phase.
- for (int i = 0; i < num_input_channels_; i++) {
- if (i % 2 == 0) {
- angular_freq = 0;
- } else {
- angular_freq = 2 * M_PI * cosine_frequency_hz;
- }
- packet_data->row(i) =
- Eigen::ArrayXf::LinSpaced(packet_size_samples,
- packet_start_time_seconds * angular_freq,
- packet_end_time_seconds * angular_freq)
- .cos()
- .transpose();
- }
- int64 input_timestamp = round(packet_start_time_seconds *
- Timestamp::kTimestampUnitsPerSecond);
- AppendInputPacket(packet_data, input_timestamp);
- total_num_input_samples += packet_size_samples;
- }
- }
-
- // Return vector of the numbers of frames in each output packet.
- std::vector OutputFramesPerPacket() {
- std::vector frame_counts;
- for (const Packet& packet : output().packets) {
- const Matrix& matrix = packet.Get();
- frame_counts.push_back(matrix.cols());
- }
- return frame_counts;
- }
-
- // Checks output headers and Timestamps.
- void CheckOutputHeadersAndTimestamps() {
- const int fft_size = audio_dsp::NextPowerOfTwo(frame_duration_samples_);
-
- TimeSeriesHeader expected_header = input().header.Get();
- expected_header.set_num_channels(fft_size / 2 + 1);
- // The output header sample rate should depend on the output frame step.
- expected_header.set_sample_rate(input_sample_rate_ / frame_step_samples_);
- // SpectrogramCalculator stores the sample rate of the input in
- // the TimeSeriesHeader.
- expected_header.set_audio_sample_rate(input_sample_rate_);
- // We expect the output header to have num_samples and packet_rate unset.
- expected_header.clear_num_samples();
- expected_header.clear_packet_rate();
- if (!options_.allow_multichannel_input()) {
- ExpectOutputHeaderEquals(expected_header);
- } else {
- EXPECT_THAT(output()
- .header.template Get()
- .time_series_header(),
- mediapipe::EqualsProto(expected_header));
- EXPECT_THAT(output()
- .header.template Get()
- .num_streams(),
- num_input_channels_);
- }
-
- int cumulative_output_frames = 0;
- // The timestamps coming out of the spectrogram correspond to the
- // middle of the first frame's window, hence frame_duration_samples_/2
- // term. We use frame_duration_samples_ because that is how it is
- // actually quantized inside spectrogram.
- const double packet_timestamp_offset_seconds =
- kInitialTimestampOffsetMicroseconds * 1e-6;
- const double frame_step_seconds = frame_step_samples_ / input_sample_rate_;
-
- Timestamp initial_timestamp = Timestamp::Unstarted();
-
- for (const Packet& packet : output().packets) {
- // This is the timestamp we expect based on how the spectrogram should
- // behave (advancing by one step's worth of input samples each frame).
- const double expected_timestamp_seconds =
- packet_timestamp_offset_seconds +
- cumulative_output_frames * frame_step_seconds;
- const int64 expected_timestamp_ticks =
- expected_timestamp_seconds * Timestamp::kTimestampUnitsPerSecond;
- EXPECT_EQ(expected_timestamp_ticks, packet.Timestamp().Value());
- // Accept the timestamp of the first packet as the baseline for checking
- // the remainder.
- if (initial_timestamp == Timestamp::Unstarted()) {
- initial_timestamp = packet.Timestamp();
- }
- // Also check that the timestamp is consistent with the sample_rate
- // in the output stream's TimeSeriesHeader.
- EXPECT_TRUE(time_series_util::LogWarningIfTimestampIsInconsistent(
- packet.Timestamp(), initial_timestamp, cumulative_output_frames,
- expected_header.sample_rate()));
- if (!options_.allow_multichannel_input()) {
- if (options_.output_type() == SpectrogramCalculatorOptions::COMPLEX) {
- const Eigen::MatrixXcf& matrix = packet.Get();
- cumulative_output_frames += matrix.cols();
- } else {
- const Matrix& matrix = packet.Get();
- cumulative_output_frames += matrix.cols();
- }
- } else {
- if (options_.output_type() == SpectrogramCalculatorOptions::COMPLEX) {
- const Eigen::MatrixXcf& matrix =
- packet.Get>().at(0);
- cumulative_output_frames += matrix.cols();
- } else {
- const Matrix& matrix = packet.Get>().at(0);
- cumulative_output_frames += matrix.cols();
- }
- }
- }
- }
-
- // Verify that the bin corresponding to the specified frequency
- // is the largest one in one particular frame of a single packet.
- void CheckPeakFrequencyInPacketFrame(const Packet& packet, int frame,
- float frequency) {
- const int fft_size = audio_dsp::NextPowerOfTwo(frame_duration_samples_);
- const int target_bin =
- round((frequency / input_sample_rate_) * static_cast(fft_size));
-
- const Matrix& matrix = packet.Get();
- // Stop here if the requested frame is not in this packet.
- ASSERT_GT(matrix.cols(), frame);
-
- int actual_largest_bin;
- matrix.col(frame).maxCoeff(&actual_largest_bin);
- EXPECT_EQ(actual_largest_bin, target_bin);
- }
-
- // Verify that the bin corresponding to the specified frequency
- // is the largest one in one particular frame of a single spectrogram Matrix.
- void CheckPeakFrequencyInMatrix(const Matrix& matrix, int frame,
- float frequency) {
- const int fft_size = audio_dsp::NextPowerOfTwo(frame_duration_samples_);
- const int target_bin =
- round((frequency / input_sample_rate_) * static_cast(fft_size));
-
- // Stop here if the requested frame is not in this packet.
- ASSERT_GT(matrix.cols(), frame);
-
- int actual_largest_bin;
- matrix.col(frame).maxCoeff(&actual_largest_bin);
- EXPECT_EQ(actual_largest_bin, target_bin);
- }
-
- int frame_duration_samples_;
- int frame_step_samples_;
- // Expected DC output for a window of pure 1.0, set when window length
- // is set.
- float expected_dc_squared_magnitude_;
-};
-
-TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationNoOverlap) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(0.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {500, 200};
- const std::vector expected_output_packet_sizes = {5, 2};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationSomeOverlap) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {500, 200};
- // complete_output_frames = 1 + floor((input_samples - window_length)/step)
- // = 1 + floor((500 - 100)/40) = 1 + 10 = 11 for the first packet
- // = 1 + floor((700 - 100)/40) = 1 + 15 = 16 for the whole stream
- // so expect 16 - 11 = 5 in the second packet.
- const std::vector expected_output_packet_sizes = {11, 5};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, NonintegerFrameDurationAndOverlap) {
- options_.set_frame_duration_seconds(98.5 / input_sample_rate_);
- options_.set_frame_overlap_seconds(58.4 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {500, 200};
- // now frame_duration_samples will be 99 (rounded), and frame_step_samples
- // will be (99-58) = 41, so the first packet of 500 samples will generate
- // 1 + floor(500-99)/41 = 10 samples.
- const std::vector expected_output_packet_sizes = {10, 5};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, ShortInitialPacketNoOverlap) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(0.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {90, 100, 110};
- // The first input packet is too small to generate any frames,
- // but zero-length packets would result in a timestamp monotonicity
- // violation, so they are suppressed. Thus, only the second and third
- // input packets generate output packets.
- const std::vector expected_output_packet_sizes = {1, 2};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, TrailingSamplesNoPad) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {140, 90};
- const std::vector expected_output_packet_sizes = {2, 2};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, NoTrailingSamplesWithPad) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(true);
- const std::vector input_packet_sizes = {140, 80};
- const std::vector expected_output_packet_sizes = {2, 2};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, TrailingSamplesWithPad) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(true);
- const std::vector input_packet_sizes = {140, 90};
- // In contrast to NoTrailingSamplesWithPad and TrailingSamplesNoPad,
- // this time we get an extra frame in an extra final packet.
- const std::vector expected_output_packet_sizes = {2, 2, 1};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, VeryShortInputWillPad) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(true);
- const std::vector input_packet_sizes = {30};
- const std::vector expected_output_packet_sizes = {1};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, VeryShortInputZeroOutputFramesIfNoPad) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- const std::vector input_packet_sizes = {90};
- const std::vector expected_output_packet_sizes = {};
-
- InitializeGraph();
- FillInputHeader();
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
-}
-
-TEST_F(SpectrogramCalculatorTest, DCSignalIsPeakBin) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- const std::vector input_packet_sizes = {140}; // Gives 2 output frames.
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- const float dc_frequency_hz = 0.0;
- CheckPeakFrequencyInPacketFrame(output().packets[0], 0, dc_frequency_hz);
- CheckPeakFrequencyInPacketFrame(output().packets[0], 1, dc_frequency_hz);
-}
-
-TEST_F(SpectrogramCalculatorTest, A440ToneIsPeakBin) {
- const std::vector input_packet_sizes = {
- 460}; // 100 + 9*40 for 10 frames.
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- InitializeGraph();
- FillInputHeader();
- const float tone_frequency_hz = 440.0;
- SetupCosineInputPackets(input_packet_sizes, tone_frequency_hz);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- int num_output_frames = output().packets[0].Get().cols();
- for (int frame = 0; frame < num_output_frames; ++frame) {
- CheckPeakFrequencyInPacketFrame(output().packets[0], frame,
- tone_frequency_hz);
- }
-}
-
-TEST_F(SpectrogramCalculatorTest, SquaredMagnitudeOutputLooksRight) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::SQUARED_MAGNITUDE);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(output().packets[0].Get()(0, 0),
- expected_dc_squared_magnitude_);
-}
-
-TEST_F(SpectrogramCalculatorTest, DefaultOutputIsSquaredMagnitude) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- // Let the output_type be its default
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(output().packets[0].Get()(0, 0),
- expected_dc_squared_magnitude_);
-}
-
-TEST_F(SpectrogramCalculatorTest, LinearMagnitudeOutputLooksRight) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::LINEAR_MAGNITUDE);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(output().packets[0].Get()(0, 0),
- std::sqrt(expected_dc_squared_magnitude_));
-}
-
-TEST_F(SpectrogramCalculatorTest, DbMagnitudeOutputLooksRight) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::DECIBELS);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(output().packets[0].Get()(0, 0),
- 10.0 * std::log10(expected_dc_squared_magnitude_));
-}
-
-TEST_F(SpectrogramCalculatorTest, OutputScalingLooksRight) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::DECIBELS);
- double output_scale = 2.5;
- options_.set_output_scale(output_scale);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(
- output().packets[0].Get()(0, 0),
- output_scale * 10.0 * std::log10(expected_dc_squared_magnitude_));
-}
-
-TEST_F(SpectrogramCalculatorTest, ComplexOutputLooksRight) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::COMPLEX);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Setup packets with DC input (non-zero constant value).
- SetupConstantInputPackets(input_packet_sizes);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- EXPECT_FLOAT_EQ(std::norm(output().packets[0].Get()(0, 0)),
- expected_dc_squared_magnitude_);
-}
-
-TEST_F(SpectrogramCalculatorTest, ComplexOutputLooksRightForImpulses) {
- const int frame_size_samples = 100;
- options_.set_frame_duration_seconds(frame_size_samples / input_sample_rate_);
- options_.set_frame_overlap_seconds(0.0 / input_sample_rate_);
- options_.set_pad_final_packet(false);
- options_.set_output_type(SpectrogramCalculatorOptions::COMPLEX);
- const std::vector input_packet_sizes = {frame_size_samples,
- frame_size_samples};
- const std::vector input_packet_impulse_offsets = {49, 50};
-
- InitializeGraph();
- FillInputHeader();
-
- // Make two impulse packets offset one sample from each other
- SetupImpulseInputPackets(input_packet_sizes, input_packet_impulse_offsets);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- const int num_buckets =
- (audio_dsp::NextPowerOfTwo(frame_size_samples) / 2) + 1;
- const float precision = 0.01f;
- auto norm_fn = [](const std::complex& cf) { return std::norm(cf); };
-
- // Both impulses should have (approximately) constant power across all
- // frequency bins
- EXPECT_TRUE(output()
- .packets[0]
- .Get()
- .unaryExpr(norm_fn)
- .isApproxToConstant(1.0f, precision));
- EXPECT_TRUE(output()
- .packets[1]
- .Get()
- .unaryExpr(norm_fn)
- .isApproxToConstant(1.0f, precision));
-
- // Because the second Packet's impulse is delayed by exactly one sample with
- // respect to the first Packet's impulse, the second impulse should have
- // greater phase, and in the highest frequency bin, the real part should
- // (approximately) flip sign from the first Packet to the second
- EXPECT_LT(std::arg(output().packets[0].Get()(1, 0)),
- std::arg(output().packets[1].Get()(1, 0)));
- const float highest_bucket_real_ratio =
- output().packets[0].Get()(num_buckets - 1, 0).real() /
- output().packets[1].Get()(num_buckets - 1, 0).real();
- EXPECT_NEAR(highest_bucket_real_ratio, -1.0f, precision);
-}
-
-TEST_F(SpectrogramCalculatorTest, SquaredMagnitudeOutputLooksRightForNonDC) {
- const int frame_size_samples = 100;
- options_.set_frame_duration_seconds(frame_size_samples / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_output_type(SpectrogramCalculatorOptions::SQUARED_MAGNITUDE);
- const std::vector input_packet_sizes = {140};
-
- InitializeGraph();
- FillInputHeader();
- // Make the tone have an integral number of cycles within the window
- const int target_bin = 16;
- const int fft_size = audio_dsp::NextPowerOfTwo(frame_size_samples);
- const float tone_frequency_hz = target_bin * (input_sample_rate_ / fft_size);
- SetupCosineInputPackets(input_packet_sizes, tone_frequency_hz);
-
- MP_ASSERT_OK(Run());
-
- CheckOutputHeadersAndTimestamps();
- // For a non-DC bin, the magnitude will be split between positive and
- // negative frequency bins, so it should about be half-magnitude
- // = quarter-power.
- // It's not quite exact because of the interference from the hann(100)
- // spread from the negative-frequency half.
- EXPECT_GT(output().packets[0].Get()(target_bin, 0),
- 0.98 * expected_dc_squared_magnitude_ / 4.0);
- EXPECT_LT(output().packets[0].Get()(target_bin, 0),
- 1.02 * expected_dc_squared_magnitude_ / 4.0);
-}
-
-TEST_F(SpectrogramCalculatorTest, ZeroOutputsForZeroInputsWithPaddingEnabled) {
- options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
- options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
- options_.set_pad_final_packet(true);
- const std::vector input_packet_sizes = {};
- const std::vector