Use synthetic timestamps in Web when none provided

PiperOrigin-RevId: 500327275
This commit is contained in:
Sebastian Schmidt 2023-01-06 21:34:46 -08:00 committed by Copybara-Service
parent ed0054836a
commit c9ebc6fa60
12 changed files with 84 additions and 34 deletions

View File

@ -126,6 +126,8 @@ export class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
return this.applyOptions(options);
}
// TODO: Add a classifyStream() that takes a timestamp
/**
* Performs audio classification on the provided audio clip and waits
* synchronously for the response.
@ -194,8 +196,9 @@ export class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoVectorListener(
TIMESTAMPED_CLASSIFICATIONS_STREAM, binaryProtos => {
TIMESTAMPED_CLASSIFICATIONS_STREAM, (binaryProtos, timestamp) => {
this.addJsAudioClassificationResults(binaryProtos);
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -128,6 +128,8 @@ export class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> {
return this.applyOptions(options);
}
// TODO: Add a classifyStream() that takes a timestamp
/**
* Performs embeding extraction on the provided audio clip and waits
* synchronously for the response.
@ -193,20 +195,24 @@ export class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> {
graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => {
const embeddingResult = EmbeddingResult.deserializeBinary(binaryProto);
this.graphRunner.attachProtoListener(
EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResults.push(
convertFromEmbeddingResultProto(embeddingResult));
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
TIMESTAMPED_EMBEDDINGS_STREAM, data => {
TIMESTAMPED_EMBEDDINGS_STREAM, (data, timestamp) => {
for (const binaryProto of data) {
const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResults.push(
convertFromEmbeddingResultProto(embeddingResult));
}
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -36,8 +36,11 @@ export abstract class AudioTaskRunner<T> extends TaskRunner {
/** Sends a single audio clip to the graph and awaits results. */
protected processAudioClip(audioData: Float32Array, sampleRate?: number): T {
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
return this.process(
audioData, sampleRate ?? this.defaultSampleRate, performance.now());
audioData, sampleRate ?? this.defaultSampleRate, syntheticTimestamp);
}
}

View File

@ -50,7 +50,7 @@ export async function createTaskRunner<T extends TaskRunner>(
}
};
// Initialize a canvas if requested. If OffscreenCanvas is availble, we
// Initialize a canvas if requested. If OffscreenCanvas is available, we
// let the graph runner initialize it by passing `undefined`.
const canvas = initializeCanvas ? (typeof OffscreenCanvas === 'undefined' ?
document.createElement('canvas') :
@ -66,6 +66,7 @@ export async function createTaskRunner<T extends TaskRunner>(
export abstract class TaskRunner {
protected abstract baseOptions: BaseOptionsProto;
private processingErrors: Error[] = [];
private latestOutputTimestamp = 0;
/**
* Creates a new instance of a Mediapipe Task. Determines if SIMD is
@ -162,6 +163,21 @@ export abstract class TaskRunner {
this.handleErrors();
}
/*
* Sets the latest output timestamp received from the graph (in ms).
* Timestamps that are smaller than the currently latest output timestamp are
* ignored.
*/
protected setLatestOutputTimestamp(timestamp: number): void {
this.latestOutputTimestamp =
Math.max(this.latestOutputTimestamp, timestamp);
}
/** Returns the latest output timestamp. */
protected getLatestOutputTimestamp() {
return this.latestOutputTimestamp;
}
/** Throws the error from the error listener if an error was raised. */
private handleErrors() {
try {

View File

@ -131,10 +131,11 @@ export class TextClassifier extends TaskRunner {
* @return The classification result of the text
*/
classify(text: string): TextClassifierResult {
// Get classification result by running our MediaPipe graph.
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.classificationResult = {classifications: []};
this.graphRunner.addStringToStream(
text, INPUT_STREAM, /* timestamp= */ performance.now());
this.graphRunner.addStringToStream(text, INPUT_STREAM, syntheticTimestamp);
this.finishProcessing();
return this.classificationResult;
}
@ -158,9 +159,10 @@ export class TextClassifier extends TaskRunner {
graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoListener(
CLASSIFICATIONS_STREAM, binaryProto => {
CLASSIFICATIONS_STREAM, (binaryProto, timestamp) => {
this.classificationResult = convertFromClassificationResultProto(
ClassificationResult.deserializeBinary(binaryProto));
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -135,9 +135,10 @@ export class TextEmbedder extends TaskRunner {
* @return The embedding resuls of the text
*/
embed(text: string): TextEmbedderResult {
// Get text embeddings by running our MediaPipe graph.
this.graphRunner.addStringToStream(
text, INPUT_STREAM, /* timestamp= */ performance.now());
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.graphRunner.addStringToStream(text, INPUT_STREAM, syntheticTimestamp);
this.finishProcessing();
return this.embeddingResult;
}
@ -173,9 +174,13 @@ export class TextEmbedder extends TaskRunner {
graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => {
const embeddingResult = EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResult = convertFromEmbeddingResultProto(embeddingResult);
this.graphRunner.attachProtoListener(
EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
const embeddingResult =
EmbeddingResult.deserializeBinary(binaryProto);
this.embeddingResult =
convertFromEmbeddingResultProto(embeddingResult);
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -71,7 +71,11 @@ export abstract class VisionTaskRunner extends TaskRunner {
'Task is not initialized with image mode. ' +
'\'runningMode\' must be set to \'image\'.');
}
this.process(image, imageProcessingOptions, performance.now());
// Increment the timestamp by 1 millisecond to guarantee that we send
// monotonically increasing timestamps to the graph.
const syntheticTimestamp = this.getLatestOutputTimestamp() + 1;
this.process(image, imageProcessingOptions, syntheticTimestamp);
}
/** Sends a single video frame to the graph and awaits results. */

View File

@ -380,23 +380,27 @@ export class GestureRecognizer extends VisionTaskRunner {
graphConfig.addNode(recognizerNode);
this.graphRunner.attachProtoVectorListener(
LANDMARKS_STREAM, binaryProto => {
LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.addJsLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
WORLD_LANDMARKS_STREAM, binaryProto => {
WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.adddJsWorldLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
HAND_GESTURES_STREAM, binaryProto => {
HAND_GESTURES_STREAM, (binaryProto, timestamp) => {
// Gesture index is not used, because the final gesture result comes
// from multiple classifiers.
this.gestures.push(
...this.toJsCategories(binaryProto, /* populateIndex= */ false));
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
HANDEDNESS_STREAM, binaryProto => {
HANDEDNESS_STREAM, (binaryProto, timestamp) => {
this.handednesses.push(...this.toJsCategories(binaryProto));
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -313,16 +313,19 @@ export class HandLandmarker extends VisionTaskRunner {
graphConfig.addNode(landmarkerNode);
this.graphRunner.attachProtoVectorListener(
LANDMARKS_STREAM, binaryProto => {
LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.addJsLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
WORLD_LANDMARKS_STREAM, binaryProto => {
WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
this.adddJsWorldLandmarks(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
this.graphRunner.attachProtoVectorListener(
HANDEDNESS_STREAM, binaryProto => {
HANDEDNESS_STREAM, (binaryProto, timestamp) => {
this.handednesses.push(...this.toJsCategories(binaryProto));
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -187,9 +187,10 @@ export class ImageClassifier extends VisionTaskRunner {
graphConfig.addNode(classifierNode);
this.graphRunner.attachProtoListener(
CLASSIFICATIONS_STREAM, binaryProto => {
CLASSIFICATIONS_STREAM, (binaryProto, timestamp) => {
this.classificationResult = convertFromClassificationResultProto(
ClassificationResult.deserializeBinary(binaryProto));
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -206,8 +206,10 @@ export class ImageEmbedder extends VisionTaskRunner {
graphConfig.addNode(embedderNode);
this.graphRunner.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => {
this.graphRunner.attachProtoListener(
EMBEDDINGS_STREAM, (binaryProto, timestamp) => {
this.addJsImageEmdedding(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();

View File

@ -176,7 +176,7 @@ export class ObjectDetector extends VisionTaskRunner {
}
/**
* Performs object detection on the provided vidoe frame and waits
* Performs object detection on the provided video frame and waits
* synchronously for the response. Only use this method when the
* ObjectDetector is created with running mode `video`.
*
@ -248,8 +248,9 @@ export class ObjectDetector extends VisionTaskRunner {
graphConfig.addNode(detectorNode);
this.graphRunner.attachProtoVectorListener(
DETECTIONS_STREAM, binaryProto => {
DETECTIONS_STREAM, (binaryProto, timestamp) => {
this.addJsObjectDetections(binaryProto);
this.setLatestOutputTimestamp(timestamp);
});
const binaryGraph = graphConfig.serializeBinary();