Use synthetic timestamps in Web when none provided

PiperOrigin-RevId: 500327275
This commit is contained in:
Sebastian Schmidt 2023-01-06 21:34:46 -08:00 committed by Copybara-Service
parent ed0054836a
commit c9ebc6fa60
12 changed files with 84 additions and 34 deletions

View File

@ -126,6 +126,8 @@ export class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
return this.applyOptions(options); return this.applyOptions(options);
} }
// TODO: Add a classifyStream() that takes a timestamp
/** /**
* Performs audio classification on the provided audio clip and waits * Performs audio classification on the provided audio clip and waits
* synchronously for the response. * synchronously for the response.
@ -194,8 +196,9 @@ export class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
graphConfig.addNode(classifierNode); graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
TIMESTAMPED_CLASSIFICATIONS_STREAM, binaryProtos => { TIMESTAMPED_CLASSIFICATIONS_STREAM, (binaryProtos, timestamp) => {
this.addJsAudioClassificationResults(binaryProtos); this.addJsAudioClassificationResults(binaryProtos);
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -128,6 +128,8 @@ export class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> {
return this.applyOptions(options); return this.applyOptions(options);
} }
// TODO: Add a classifyStream() that takes a timestamp
/** /**
* Performs embeding extraction on the provided audio clip and waits * Performs embeding extraction on the provided audio clip and waits
* synchronously for the response. * synchronously for the response.
@ -193,20 +195,24 @@ export class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> {
graphConfig.addNode(embedderNode); graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => { this.graphRunner.attachProtoListener(
const embeddingResult = EmbeddingResult.deserializeBinary(binaryProto); EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResults.push( this.embeddingResults.push(
convertFromEmbeddingResultProto(embeddingResult)); convertFromEmbeddingResultProto(embeddingResult));
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
TIMESTAMPED_EMBEDDINGS_STREAM, data => { TIMESTAMPED_EMBEDDINGS_STREAM, (data, timestamp) => {
for (const binaryProto of data) { for (const binaryProto of data) {
const embeddingResult = const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto); EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResults.push( this.embeddingResults.push(
convertFromEmbeddingResultProto(embeddingResult)); convertFromEmbeddingResultProto(embeddingResult));
} }
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -36,8 +36,11 @@ export abstract class AudioTaskRunner<T> extends TaskRunner {
/** Sends a single audio clip to the graph and awaits results. */ /** Sends a single audio clip to the graph and awaits results. */
protected processAudioClip(audioData: Float32Array, sampleRate?: number): T { protected processAudioClip(audioData: Float32Array, sampleRate?: number): T {
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
return this.process( return this.process(
audioData, sampleRate ?? this.defaultSampleRate, performance.now()); audioData, sampleRate ?? this.defaultSampleRate, syntheticTimestamp);
} }
} }

View File

@ -50,7 +50,7 @@ export async function createTaskRunner<T extends TaskRunner>(
} }
}; };
// Initialize a canvas if requested. If OffscreenCanvas is availble, we // Initialize a canvas if requested. If OffscreenCanvas is available, we
// let the graph runner initialize it by passing `undefined`. // let the graph runner initialize it by passing `undefined`.
const canvas = initializeCanvas ? (typeof OffscreenCanvas === 'undefined' ? const canvas = initializeCanvas ? (typeof OffscreenCanvas === 'undefined' ?
document.createElement('canvas') : document.createElement('canvas') :
@ -66,6 +66,7 @@ export async function createTaskRunner<T extends TaskRunner>(
export abstract class TaskRunner { export abstract class TaskRunner {
protected abstract baseOptions: BaseOptionsProto; protected abstract baseOptions: BaseOptionsProto;
private processingErrors: Error[] = []; private processingErrors: Error[] = [];
private latestOutputTimestamp = 0;
/** /**
* Creates a new instance of a Mediapipe Task. Determines if SIMD is * Creates a new instance of a Mediapipe Task. Determines if SIMD is
@ -162,6 +163,21 @@ export abstract class TaskRunner {
this.handleErrors(); this.handleErrors();
} }
/*
* Sets the latest output timestamp received from the graph (in ms).
* Timestamps that are smaller than the currently latest output timestamp are
* ignored.
*/
protected setLatestOutputTimestamp(timestamp: number): void {
this.latestOutputTimestamp =
Math.max(this.latestOutputTimestamp, timestamp);
}
/** Returns the latest output timestamp. */
protected getLatestOutputTimestamp() {
return this.latestOutputTimestamp;
}
/** Throws the error from the error listener if an error was raised. */ /** Throws the error from the error listener if an error was raised. */
private handleErrors() { private handleErrors() {
try { try {

View File

@ -131,10 +131,11 @@ export class TextClassifier extends TaskRunner {
* @return The classification result of the text * @return The classification result of the text
*/ */
classify(text: string): TextClassifierResult { classify(text: string): TextClassifierResult {
// Get classification result by running our MediaPipe graph. // Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.classificationResult = {classifications: []}; this.classificationResult = {classifications: []};
this.graphRunner.addStringToStream( this.graphRunner.addStringToStream(text, INPUT_STREAM, syntheticTimestamp);
text, INPUT_STREAM, /* timestamp= */ performance.now());
this.finishProcessing(); this.finishProcessing();
return this.classificationResult; return this.classificationResult;
} }
@ -158,9 +159,10 @@ export class TextClassifier extends TaskRunner {
graphConfig.addNode(classifierNode); graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoListener( this.graphRunner.attachProtoListener(
CLASSIFICATIONS_STREAM, binaryProto => { CLASSIFICATIONS_STREAM, (binaryProto, timestamp) => {
this.classificationResult = convertFromClassificationResultProto( this.classificationResult = convertFromClassificationResultProto(
ClassificationResult.deserializeBinary(binaryProto)); ClassificationResult.deserializeBinary(binaryProto));
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -135,9 +135,10 @@ export class TextEmbedder extends TaskRunner {
* @return The embedding resuls of the text * @return The embedding resuls of the text
*/ */
embed(text: string): TextEmbedderResult { embed(text: string): TextEmbedderResult {
// Get text embeddings by running our MediaPipe graph. // Increment the timestamp by 1 millisecond to guarantee that we send
this.graphRunner.addStringToStream( // monotonically increasing timestamps to the graph.
text, INPUT_STREAM, /* timestamp= */ performance.now()); const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.graphRunner.addStringToStream(text, INPUT_STREAM, syntheticTimestamp);
this.finishProcessing(); this.finishProcessing();
return this.embeddingResult; return this.embeddingResult;
} }
@ -173,9 +174,13 @@ export class TextEmbedder extends TaskRunner {
graphConfig.addNode(embedderNode); graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => { this.graphRunner.attachProtoListener(
const embeddingResult = EmbeddingResult.deserializeBinary(binaryProto); EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
this.embeddingResult = convertFromEmbeddingResultProto(embeddingResult); const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResult =
convertFromEmbeddingResultProto(embeddingResult);
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -71,7 +71,11 @@ export abstract class VisionTaskRunner extends TaskRunner {
'Task is not initialized with image mode. ' + 'Task is not initialized with image mode. ' +
'\'runningMode\' must be set to \'image\'.'); '\'runningMode\' must be set to \'image\'.');
} }
this.process(image, imageProcessingOptions, performance.now());
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.process(image, imageProcessingOptions, syntheticTimestamp);
} }
/** Sends a single video frame to the graph and awaits results. */ /** Sends a single video frame to the graph and awaits results. */

View File

@ -380,23 +380,27 @@ export class GestureRecognizer extends VisionTaskRunner {
graphConfig.addNode(recognizerNode); graphConfig.addNode(recognizerNode);
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
LANDMARKS_STREAM, binaryProto => { LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.addJsLandmarks(binaryProto); this.addJsLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
WORLD_LANDMARKS_STREAM, binaryProto => { WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.adddJsWorldLandmarks(binaryProto); this.adddJsWorldLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
HAND_GESTURES_STREAM, binaryProto => { HAND_GESTURES_STREAM, (binaryProto, timestamp) => {
// Gesture index is not used, because the final gesture result comes // Gesture index is not used, because the final gesture result comes
// from multiple classifiers. // from multiple classifiers.
this.gestures.push( this.gestures.push(
...this.toJsCategories(binaryProto, /* populateIndex= */ false)); ...this.toJsCategories(binaryProto, /* populateIndex= */ false));
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
HANDEDNESS_STREAM, binaryProto => { HANDEDNESS_STREAM, (binaryProto, timestamp) => {
this.handednesses.push(...this.toJsCategories(binaryProto)); this.handednesses.push(...this.toJsCategories(binaryProto));
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -313,16 +313,19 @@ export class HandLandmarker extends VisionTaskRunner {
graphConfig.addNode(landmarkerNode); graphConfig.addNode(landmarkerNode);
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
LANDMARKS_STREAM, binaryProto => { LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.addJsLandmarks(binaryProto); this.addJsLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
WORLD_LANDMARKS_STREAM, binaryProto => { WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.adddJsWorldLandmarks(binaryProto); this.adddJsWorldLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
HANDEDNESS_STREAM, binaryProto => { HANDEDNESS_STREAM, (binaryProto, timestamp) => {
this.handednesses.push(...this.toJsCategories(binaryProto)); this.handednesses.push(...this.toJsCategories(binaryProto));
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -187,9 +187,10 @@ export class ImageClassifier extends VisionTaskRunner {
graphConfig.addNode(classifierNode); graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoListener( this.graphRunner.attachProtoListener(
CLASSIFICATIONS_STREAM, binaryProto => { CLASSIFICATIONS_STREAM, (binaryProto, timestamp) => {
this.classificationResult = convertFromClassificationResultProto( this.classificationResult = convertFromClassificationResultProto(
ClassificationResult.deserializeBinary(binaryProto)); ClassificationResult.deserializeBinary(binaryProto));
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -206,8 +206,10 @@ export class ImageEmbedder extends VisionTaskRunner {
graphConfig.addNode(embedderNode); graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => { this.graphRunner.attachProtoListener(
EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
this.addJsImageEmdedding(binaryProto); this.addJsImageEmdedding(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();

View File

@ -176,7 +176,7 @@ export class ObjectDetector extends VisionTaskRunner {
} }
/** /**
* Performs object detection on the provided vidoe frame and waits * Performs object detection on the provided video frame and waits
* synchronously for the response. Only use this method when the * synchronously for the response. Only use this method when the
* ObjectDetector is created with running mode `video`. * ObjectDetector is created with running mode `video`.
* *
@ -248,8 +248,9 @@ export class ObjectDetector extends VisionTaskRunner {
graphConfig.addNode(detectorNode); graphConfig.addNode(detectorNode);
this.graphRunner.attachProtoVectorListener( this.graphRunner.attachProtoVectorListener(
DETECTIONS_STREAM, binaryProto => { DETECTIONS_STREAM, (binaryProto, timestamp) => {
this.addJsObjectDetections(binaryProto); this.addJsObjectDetections(binaryProto);
this.setLatestOutputTimestamp(timestamp);
}); });
const binaryGraph = graphConfig.serializeBinary(); const binaryGraph = graphConfig.serializeBinary();