Create AudioTaskRunner

PiperOrigin-RevId: 489613573
2022-11-18 19:53:21 -08:00 · 2022-11-18 19:53:21 -08:00 · e853f04b79
commit e853f04b79
parent eb8ef1ace0
6 changed files with 98 additions and 53 deletions
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/core/BaseAudioTaskApi.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/core/BaseAudioTaskApi.java
@ -116,6 +116,7 @@ public class BaseAudioTaskApi implements AutoCloseable {
      defaultSampleRate = sampleRate;
    }
  }
+
  /**
   * An asynchronous method to send audio stream data to the {@link TaskRunner}. The results will be
   * available in the user-defined result listener.
--- a/mediapipe/tasks/web/audio/audio_classifier/BUILD
+++ b/mediapipe/tasks/web/audio/audio_classifier/BUILD
@ -17,14 +17,14 @@ mediapipe_ts_library(
        "//mediapipe/framework:calculator_options_jspb_proto",
        "//mediapipe/tasks/cc/audio/audio_classifier/proto:audio_classifier_graph_options_jspb_proto",
        "//mediapipe/tasks/cc/components/containers/proto:classifications_jspb_proto",
+        "//mediapipe/tasks/cc/core/proto:base_options_jspb_proto",
+        "//mediapipe/tasks/web/audio/core:audio_task_runner",
        "//mediapipe/tasks/web/components/containers:category",
        "//mediapipe/tasks/web/components/containers:classification_result",
-        "//mediapipe/tasks/web/components/processors:base_options",
        "//mediapipe/tasks/web/components/processors:classifier_options",
        "//mediapipe/tasks/web/components/processors:classifier_result",
        "//mediapipe/tasks/web/core",
        "//mediapipe/tasks/web/core:classifier_options",
-        "//mediapipe/tasks/web/core:task_runner",
        "//mediapipe/web/graph_runner:graph_runner_ts",
    ],
 )
--- a/mediapipe/tasks/web/audio/audio_classifier/audio_classifier.ts
+++ b/mediapipe/tasks/web/audio/audio_classifier/audio_classifier.ts
@ -18,10 +18,10 @@ import {CalculatorGraphConfig} from '../../../../framework/calculator_pb';
 import {CalculatorOptions} from '../../../../framework/calculator_options_pb';
 import {AudioClassifierGraphOptions} from '../../../../tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb';
 import {ClassificationResult} from '../../../../tasks/cc/components/containers/proto/classifications_pb';
-import {convertBaseOptionsToProto} from '../../../../tasks/web/components/processors/base_options';
+import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb';
+import {AudioTaskRunner} from '../../../../tasks/web/audio/core/audio_task_runner';
 import {convertClassifierOptionsToProto} from '../../../../tasks/web/components/processors/classifier_options';
 import {convertFromClassificationResultProto} from '../../../../tasks/web/components/processors/classifier_result';
-import {TaskRunner} from '../../../../tasks/web/core/task_runner';
 import {WasmLoaderOptions} from '../../../../tasks/web/core/wasm_loader_options';
 import {createMediaPipeLib, FileLocator} from '../../../../web/graph_runner/graph_runner';
 // Placeholder for internal dependency on trusted resource url
@ -47,9 +47,8 @@ const TIMESTAMPED_CLASSIFICATIONS_STREAM = 'timestamped_classifications';
 // tslint:disable:jspb-use-builder-pattern

 /** Performs audio classification. */
-export class AudioClassifier extends TaskRunner {
+export class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
  private classificationResults: AudioClassifierResult[] = [];
-  private defaultSampleRate = 48000;
  private readonly options = new AudioClassifierGraphOptions();

  /**
@ -111,6 +110,14 @@ export class AudioClassifier extends TaskRunner {
        wasmLoaderOptions, new Uint8Array(graphData));
  }

+  protected override get baseOptions(): BaseOptionsProto|undefined {
+    return this.options.getBaseOptions();
+  }
+
+  protected override set baseOptions(proto: BaseOptionsProto|undefined) {
+    this.options.setBaseOptions(proto);
+  }
+
  /**
   * Sets new options for the audio classifier.
   *
@ -120,34 +127,19 @@ export class AudioClassifier extends TaskRunner {
   *
   * @param options The options for the audio classifier.
   */
-  async setOptions(options: AudioClassifierOptions): Promise<void> {
-    if (options.baseOptions) {
-      const baseOptionsProto = await convertBaseOptionsToProto(
-          options.baseOptions, this.options.getBaseOptions());
-      this.options.setBaseOptions(baseOptionsProto);
-    }
-
+  override async setOptions(options: AudioClassifierOptions): Promise<void> {
+    await super.setOptions(options);
    this.options.setClassifierOptions(convertClassifierOptionsToProto(
        options, this.options.getClassifierOptions()));
    this.refreshGraph();
  }

  /**
-   * Sets the sample rate for all calls to `classify()` that omit an explicit
-   * sample rate. `48000` is used as a default if this method is not called.
-   *
-   * @param sampleRate A sample rate (e.g. `44100`).
-   */
-  setDefaultSampleRate(sampleRate: number) {
-    this.defaultSampleRate = sampleRate;
-  }
-
-  /**
-   * Performs audio classification on the provided audio data and waits
+   * Performs audio classification on the provided audio clip and waits
   * synchronously for the response.
   *
-   * @param audioData An array of raw audio capture data, like
-   *     from a call to getChannelData on an AudioBuffer.
+   * @param audioData An array of raw audio capture data, like from a call to
+   *     `getChannelData()` on an AudioBuffer.
   * @param sampleRate The sample rate in Hz of the provided audio data. If not
   *     set, defaults to the sample rate set via `setDefaultSampleRate()` or
   *     `48000` if no custom default was set.
@ -155,18 +147,21 @@ export class AudioClassifier extends TaskRunner {
   */
  classify(audioData: Float32Array, sampleRate?: number):
      AudioClassifierResult[] {
-    sampleRate = sampleRate ?? this.defaultSampleRate;
+    return this.processAudioClip(audioData, sampleRate);
+  }

+  /** Sends an audio package to the graph and returns the classifications. */
+  protected override process(
+      audioData: Float32Array, sampleRate: number,
+      timestampMs: number): AudioClassifierResult[] {
    // Configures the number of samples in the WASM layer. We re-configure the
    // number of samples and the sample rate for every frame, but ignore other
    // side effects of this function (such as sending the input side packet and
    // the input stream header).
    this.configureAudio(
        /* numChannels= */ 1, /* numSamples= */ audioData.length, sampleRate);
-
-    const timestamp = performance.now();
-    this.addDoubleToStream(sampleRate, SAMPLE_RATE_STREAM, timestamp);
-    this.addAudioToStream(audioData, timestamp);
+    this.addDoubleToStream(sampleRate, SAMPLE_RATE_STREAM, timestampMs);
+    this.addAudioToStream(audioData, timestampMs);

    this.classificationResults = [];
    this.finishProcessing();
--- a/mediapipe/tasks/web/audio/core/BUILD
+++ b/mediapipe/tasks/web/audio/core/BUILD
@ -1,6 +1,6 @@
 # This package contains options shared by all MediaPipe Audio Tasks for Web.

-load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_declaration")
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_declaration", "mediapipe_ts_library")

 package(default_visibility = ["//mediapipe/tasks:internal"])

@ -11,3 +11,15 @@ mediapipe_ts_declaration(
        "//mediapipe/tasks/web/core",
    ],
 )
+
+mediapipe_ts_library(
+    name = "audio_task_runner",
+    srcs = ["audio_task_runner.ts"],
+    deps = [
+        ":audio_task_options",
+        "//mediapipe/tasks/cc/core/proto:base_options_jspb_proto",
+        "//mediapipe/tasks/web/components/processors:base_options",
+        "//mediapipe/tasks/web/core",
+        "//mediapipe/tasks/web/core:task_runner",
+    ],
+)
--- a/mediapipe/tasks/web/audio/core/audio_task_options.d.ts
+++ b/mediapipe/tasks/web/audio/core/audio_task_options.d.ts
@ -16,29 +16,8 @@

 import {BaseOptions} from '../../../../tasks/web/core/base_options';

-/**
- * MediaPipe audio task running mode. A MediaPipe audio task can be run with
- * two different modes:
- * - audio_clips:  The mode for running a mediapipe audio task on independent
- *                 audio clips.
- * - audio_stream: The mode for running a mediapipe audio task on an audio
- *                 stream, such as from a microphone.
- * </ul>
- */
-export type RunningMode = 'audio_clips'|'audio_stream';
-
 /** The options for configuring a MediaPipe Audio Task. */
 export declare interface AudioTaskOptions {
  /** Options to configure the loading of the model assets. */
  baseOptions?: BaseOptions;
-
-  /**
-   * The running mode of the task. Default to the audio_clips mode.
-   * Audio tasks have two running modes:
-   * 1) The mode for running a mediapipe audio task on independent
-   *    audio clips.
-   * 2) The mode for running a mediapipe audio task on an audio
-   *    stream, such as from a microphone.
-   */
-  runningMode?: RunningMode;
 }
--- a/mediapipe/tasks/web/audio/core/audio_task_runner.ts
+++ b/mediapipe/tasks/web/audio/core/audio_task_runner.ts
@ -0,0 +1,58 @@
+/**
+ * Copyright 2022 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb';
+import {convertBaseOptionsToProto} from '../../../../tasks/web/components/processors/base_options';
+import {TaskRunner} from '../../../../tasks/web/core/task_runner';
+
+import {AudioTaskOptions} from './audio_task_options';
+
+/** Base class for all MediaPipe Audio Tasks. */
+export abstract class AudioTaskRunner<T> extends TaskRunner {
+  protected abstract baseOptions?: BaseOptionsProto|undefined;
+  private defaultSampleRate = 48000;
+
+  /** Configures the shared options of an audio task. */
+  async setOptions(options: AudioTaskOptions): Promise<void> {
+    this.baseOptions = this.baseOptions ?? new BaseOptionsProto();
+    if (options.baseOptions) {
+      this.baseOptions = await convertBaseOptionsToProto(
+          options.baseOptions, this.baseOptions);
+    }
+  }
+
+  /**
+   * Sets the sample rate for API calls that omit an explicit sample rate.
+   * `48000` is used as a default if this method is not called.
+   *
+   * @param sampleRate A sample rate (e.g. `44100`).
+   */
+  setDefaultSampleRate(sampleRate: number) {
+    this.defaultSampleRate = sampleRate;
+  }
+
+  /** Sends an audio packet to the graph and awaits results. */
+  protected abstract process(
+      audioData: Float32Array, sampleRate: number, timestampMs: number): T;
+
+  /** Sends a single audio clip to the graph and awaits results. */
+  protected processAudioClip(audioData: Float32Array, sampleRate?: number): T {
+    return this.process(
+        audioData, sampleRate ?? this.defaultSampleRate, performance.now());
+  }
+}
+
+