From 11270d0c93c456dcee6ed736c5f0f9ed304a8916 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 14 Nov 2022 14:24:11 -0800 Subject: [PATCH] Image Embedder for Web PiperOrigin-RevId: 488468214 --- mediapipe/tasks/web/vision/BUILD | 1 + mediapipe/tasks/web/vision/core/BUILD | 11 + .../tasks/web/vision/core/running_mode.ts | 36 +++ .../tasks/web/vision/image_embedder/BUILD | 33 +++ .../vision/image_embedder/image_embedder.ts | 214 ++++++++++++++++++ .../image_embedder/image_embedder_options.ts | 31 +++ .../image_embedder/image_embedder_result.ts | 17 ++ mediapipe/tasks/web/vision/index.ts | 5 + 8 files changed, 348 insertions(+) create mode 100644 mediapipe/tasks/web/vision/core/BUILD create mode 100644 mediapipe/tasks/web/vision/core/running_mode.ts create mode 100644 mediapipe/tasks/web/vision/image_embedder/BUILD create mode 100644 mediapipe/tasks/web/vision/image_embedder/image_embedder.ts create mode 100644 mediapipe/tasks/web/vision/image_embedder/image_embedder_options.ts create mode 100644 mediapipe/tasks/web/vision/image_embedder/image_embedder_result.ts diff --git a/mediapipe/tasks/web/vision/BUILD b/mediapipe/tasks/web/vision/BUILD index 395860892..3c45fbfa6 100644 --- a/mediapipe/tasks/web/vision/BUILD +++ b/mediapipe/tasks/web/vision/BUILD @@ -11,6 +11,7 @@ mediapipe_ts_library( "//mediapipe/tasks/web/vision/gesture_recognizer", "//mediapipe/tasks/web/vision/hand_landmarker", "//mediapipe/tasks/web/vision/image_classifier", + "//mediapipe/tasks/web/vision/image_embedder", "//mediapipe/tasks/web/vision/object_detector", ], ) diff --git a/mediapipe/tasks/web/vision/core/BUILD b/mediapipe/tasks/web/vision/core/BUILD new file mode 100644 index 000000000..7ab822b7c --- /dev/null +++ b/mediapipe/tasks/web/vision/core/BUILD @@ -0,0 +1,11 @@ +# This package contains options shared by all MediaPipe Tasks for Web. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_library") + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +mediapipe_ts_library( + name = "running_mode", + srcs = ["running_mode.ts"], + deps = ["//mediapipe/tasks/cc/core/proto:base_options_jspb_proto"], +) diff --git a/mediapipe/tasks/web/vision/core/running_mode.ts b/mediapipe/tasks/web/vision/core/running_mode.ts new file mode 100644 index 000000000..1e9b1b9a7 --- /dev/null +++ b/mediapipe/tasks/web/vision/core/running_mode.ts @@ -0,0 +1,36 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb'; + +/** + * The running mode of a task. + * 1) The image mode for processing single image inputs. + * 2) The video mode for processing decoded frames of a video. + */ +export type RunningMode = 'image'|'video'; + +/** Configues the `useStreamMode` option . */ +export function configureRunningMode( + options: {runningMode?: RunningMode}, + proto?: BaseOptionsProto): BaseOptionsProto { + proto = proto ?? new BaseOptionsProto(); + if ('runningMode' in options) { + const useStreamMode = options.runningMode === 'video'; + proto.setUseStreamMode(useStreamMode); + } + return proto; +} diff --git a/mediapipe/tasks/web/vision/image_embedder/BUILD b/mediapipe/tasks/web/vision/image_embedder/BUILD new file mode 100644 index 000000000..d12a05ad9 --- /dev/null +++ b/mediapipe/tasks/web/vision/image_embedder/BUILD @@ -0,0 +1,33 @@ +# This contains the MediaPipe Image Embedder Task. +# +# This task performs embedding extraction on images. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_library") + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +mediapipe_ts_library( + name = "image_embedder", + srcs = [ + "image_embedder.ts", + "image_embedder_options.ts", + "image_embedder_result.ts", + ], + deps = [ + "//mediapipe/framework:calculator_jspb_proto", + "//mediapipe/framework:calculator_options_jspb_proto", + "//mediapipe/tasks/cc/components/containers/proto:embeddings_jspb_proto", + "//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_jspb_proto", + "//mediapipe/tasks/web/components/containers:embedding_result", + "//mediapipe/tasks/web/components/processors:base_options", + "//mediapipe/tasks/web/components/processors:embedder_options", + "//mediapipe/tasks/web/components/processors:embedder_result", + "//mediapipe/tasks/web/core", + "//mediapipe/tasks/web/core:embedder_options", + "//mediapipe/tasks/web/core:task_runner", + "//mediapipe/tasks/web/vision/core:running_mode", + "//mediapipe/web/graph_runner:wasm_mediapipe_lib_ts", + ], +) diff --git a/mediapipe/tasks/web/vision/image_embedder/image_embedder.ts b/mediapipe/tasks/web/vision/image_embedder/image_embedder.ts new file mode 100644 index 000000000..4184e763c --- /dev/null +++ b/mediapipe/tasks/web/vision/image_embedder/image_embedder.ts @@ -0,0 +1,214 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {CalculatorGraphConfig} from '../../../../framework/calculator_pb'; +import {CalculatorOptions} from '../../../../framework/calculator_options_pb'; +import {EmbeddingResult} from '../../../../tasks/cc/components/containers/proto/embeddings_pb'; +import {ImageEmbedderGraphOptions} from '../../../../tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb'; +import {convertBaseOptionsToProto} from '../../../../tasks/web/components/processors/base_options'; +import {convertEmbedderOptionsToProto} from '../../../../tasks/web/components/processors/embedder_options'; +import {convertFromEmbeddingResultProto} from '../../../../tasks/web/components/processors/embedder_result'; +import {TaskRunner} from '../../../../tasks/web/core/task_runner'; +import {WasmLoaderOptions} from '../../../../tasks/web/core/wasm_loader_options'; +import {configureRunningMode} from '../../../../tasks/web/vision/core/running_mode'; +import {createMediaPipeLib, FileLocator, ImageSource} from '../../../../web/graph_runner/wasm_mediapipe_lib'; +// Placeholder for internal dependency on trusted resource url + +import {ImageEmbedderOptions} from './image_embedder_options'; +import {ImageEmbedderResult} from './image_embedder_result'; + +// The OSS JS API does not support the builder pattern. +// tslint:disable:jspb-use-builder-pattern + +const INPUT_STREAM = 'image_in'; +const EMBEDDINGS_STREAM = 'embeddings_out'; +const TEXT_EMBEDDER_CALCULATOR = + 'mediapipe.tasks.vision.image_embedder.ImageEmbedderGraph'; + +export {ImageSource}; // Used in the public API + +/** Performs embedding extraction on images. */ +export class ImageEmbedder extends TaskRunner { + private readonly options = new ImageEmbedderGraphOptions(); + private embeddings: ImageEmbedderResult = {embeddings: []}; + + /** + * Initializes the Wasm runtime and creates a new image embedder from the + * provided options. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param imageEmbedderOptions The options for the image embedder. Note that + * either a path to the TFLite model or the model itself needs to be + * provided (via `baseOptions`). + */ + static async createFromOptions( + wasmLoaderOptions: WasmLoaderOptions, + imageEmbedderOptions: ImageEmbedderOptions): Promise { + // Create a file locator based on the loader options + const fileLocator: FileLocator = { + locateFile() { + // The only file we load is the Wasm binary + return wasmLoaderOptions.wasmBinaryPath.toString(); + } + }; + + const embedder = await createMediaPipeLib( + ImageEmbedder, wasmLoaderOptions.wasmLoaderPath, + /* assetLoaderScript= */ undefined, + /* glCanvas= */ undefined, fileLocator); + await embedder.setOptions(imageEmbedderOptions); + return embedder; + } + + /** + * Initializes the Wasm runtime and creates a new image embedder based on the + * provided model asset buffer. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param modelAssetBuffer A binary representation of the TFLite model. + */ + static createFromModelBuffer( + wasmLoaderOptions: WasmLoaderOptions, + modelAssetBuffer: Uint8Array): Promise { + return ImageEmbedder.createFromOptions( + wasmLoaderOptions, {baseOptions: {modelAssetBuffer}}); + } + + /** + * Initializes the Wasm runtime and creates a new image embedder based on the + * path to the model asset. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param modelAssetPath The path to the TFLite model. + */ + static async createFromModelPath( + wasmLoaderOptions: WasmLoaderOptions, + modelAssetPath: string): Promise { + const response = await fetch(modelAssetPath.toString()); + const graphData = await response.arrayBuffer(); + return ImageEmbedder.createFromModelBuffer( + wasmLoaderOptions, new Uint8Array(graphData)); + } + + /** + * Sets new options for the image embedder. + * + * Calling `setOptions()` with a subset of options only affects those options. + * You can reset an option back to its default value by explicitly setting it + * to `undefined`. + * + * @param options The options for the image embedder. + */ + async setOptions(options: ImageEmbedderOptions): Promise { + let baseOptionsProto = this.options.getBaseOptions(); + if (options.baseOptions) { + baseOptionsProto = await convertBaseOptionsToProto( + options.baseOptions, baseOptionsProto); + } + baseOptionsProto = configureRunningMode(options, baseOptionsProto); + this.options.setBaseOptions(baseOptionsProto); + + this.options.setEmbedderOptions(convertEmbedderOptionsToProto( + options, this.options.getEmbedderOptions())); + + this.refreshGraph(); + } + + /** + * Performs embedding extraction on the provided image and waits synchronously + * for the response. + * + * Only use this method when the `useStreamMode` option is not set or + * expliclity set to `false`. + * + * @param image The image to process. + * @return The classification result of the image + */ + embed(image: ImageSource): ImageEmbedderResult { + if (!!this.options.getBaseOptions()?.getUseStreamMode()) { + throw new Error( + 'Task is not initialized with image mode. ' + + '\'runningMode\' must be set to \'image\'.'); + } + return this.performEmbeddingExtraction(image, performance.now()); + } + + /** + * Performs embedding extraction on the provided video frame and waits + * synchronously for the response. + * + * Only use this method when the `useStreamMode` option is set to `true`. + * + * @param imageFrame The image frame to process. + * @param timestamp The timestamp of the current frame, in ms. + * @return The classification result of the image + */ + embedForVideo(imageFrame: ImageSource, timestamp: number): + ImageEmbedderResult { + if (!this.options.getBaseOptions()?.getUseStreamMode()) { + throw new Error( + 'Task is not initialized with video mode. ' + + '\'runningMode\' must be set to \'video\' or \'live_stream\'.'); + } + return this.performEmbeddingExtraction(imageFrame, timestamp); + } + + /** Runs the embedding extractio and blocks on the response. */ + private performEmbeddingExtraction(image: ImageSource, timestamp: number): + ImageEmbedderResult { + // Get embeddings by running our MediaPipe graph. + this.addGpuBufferAsImageToStream( + image, INPUT_STREAM, timestamp ?? performance.now()); + this.finishProcessing(); + return this.embeddings; + } + + /** + * Internal function for converting raw data into an embedding, and setting it + * as our embeddings result. + */ + private addJsImageEmdedding(binaryProto: Uint8Array): void { + const embeddingResult = EmbeddingResult.deserializeBinary(binaryProto); + this.embeddings = convertFromEmbeddingResultProto(embeddingResult); + } + + /** Updates the MediaPipe graph configuration. */ + private refreshGraph(): void { + const graphConfig = new CalculatorGraphConfig(); + graphConfig.addInputStream(INPUT_STREAM); + graphConfig.addOutputStream(EMBEDDINGS_STREAM); + + const calculatorOptions = new CalculatorOptions(); + calculatorOptions.setExtension(ImageEmbedderGraphOptions.ext, this.options); + + const embedderNode = new CalculatorGraphConfig.Node(); + embedderNode.setCalculator(TEXT_EMBEDDER_CALCULATOR); + embedderNode.addInputStream('IMAGE:' + INPUT_STREAM); + embedderNode.addOutputStream('EMBEDDINGS:' + EMBEDDINGS_STREAM); + embedderNode.setOptions(calculatorOptions); + + graphConfig.addNode(embedderNode); + + this.attachProtoListener(EMBEDDINGS_STREAM, binaryProto => { + this.addJsImageEmdedding(binaryProto); + }); + + const binaryGraph = graphConfig.serializeBinary(); + this.setGraph(new Uint8Array(binaryGraph), /* isBinary= */ true); + } +} + + diff --git a/mediapipe/tasks/web/vision/image_embedder/image_embedder_options.ts b/mediapipe/tasks/web/vision/image_embedder/image_embedder_options.ts new file mode 100644 index 000000000..4d795d0d8 --- /dev/null +++ b/mediapipe/tasks/web/vision/image_embedder/image_embedder_options.ts @@ -0,0 +1,31 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {EmbedderOptions} from '../../../../tasks/web/core/embedder_options'; +import {RunningMode} from '../../../../tasks/web/vision/core/running_mode'; + +/** The options for configuring a MediaPipe image embedder task. */ +export declare interface ImageEmbedderOptions extends EmbedderOptions { + /** + * The running mode of the task. Default to the image mode. + * Image embedder has three running modes: + * 1) The image mode for embedding image on single image inputs. + * 2) The video mode for embedding image on the decoded frames of a video. + * 3) The live stream mode for embedding image on the live stream of input + * data, such as from camera. + */ + runningMode?: RunningMode; +} diff --git a/mediapipe/tasks/web/vision/image_embedder/image_embedder_result.ts b/mediapipe/tasks/web/vision/image_embedder/image_embedder_result.ts new file mode 100644 index 000000000..156636505 --- /dev/null +++ b/mediapipe/tasks/web/vision/image_embedder/image_embedder_result.ts @@ -0,0 +1,17 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export {Embedding, EmbeddingResult as ImageEmbedderResult} from '../../../../tasks/web/components/containers/embedding_result'; diff --git a/mediapipe/tasks/web/vision/index.ts b/mediapipe/tasks/web/vision/index.ts index 2c46dbd3b..6dda83e55 100644 --- a/mediapipe/tasks/web/vision/index.ts +++ b/mediapipe/tasks/web/vision/index.ts @@ -19,6 +19,11 @@ export * from '../../../tasks/web/vision/image_classifier/image_classifier_optio export * from '../../../tasks/web/vision/image_classifier/image_classifier_result'; export * from '../../../tasks/web/vision/image_classifier/image_classifier'; +// Image Embedder +export * from '../../../tasks/web/vision/image_embedder/image_embedder_options'; +export * from '../../../tasks/web/vision/image_embedder/image_embedder_result'; +export * from '../../../tasks/web/vision/image_embedder/image_embedder'; + // Gesture Recognizer export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer_options'; export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer_result';