From b00236e86e00105145e47bef8498b1b715f6bf36 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 14 Nov 2022 12:11:40 -0800 Subject: [PATCH] Hand Landmarker Web API PiperOrigin-RevId: 488434079 --- mediapipe/tasks/testdata/vision/BUILD | 1 + mediapipe/tasks/web/vision/BUILD | 1 + .../tasks/web/vision/hand_landmarker/BUILD | 35 ++ .../vision/hand_landmarker/hand_landmarker.ts | 319 ++++++++++++++++++ .../hand_landmarker_options.ts | 47 +++ .../hand_landmarker/hand_landmarker_result.ts | 32 ++ mediapipe/tasks/web/vision/index.ts | 5 + 7 files changed, 440 insertions(+) create mode 100644 mediapipe/tasks/web/vision/hand_landmarker/BUILD create mode 100644 mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts create mode 100644 mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_options.ts create mode 100644 mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.ts diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index ad8072b87..95b721fdb 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -40,6 +40,7 @@ mediapipe_files(srcs = [ "fist.jpg", "hand_landmark_full.tflite", "hand_landmark_lite.tflite", + "hand_landmarker.task", "left_hands.jpg", "left_hands_rotated.jpg", "mobilenet_v1_0.25_192_quantized_1_default_1.tflite", diff --git a/mediapipe/tasks/web/vision/BUILD b/mediapipe/tasks/web/vision/BUILD index abdbc54ea..395860892 100644 --- a/mediapipe/tasks/web/vision/BUILD +++ b/mediapipe/tasks/web/vision/BUILD @@ -9,6 +9,7 @@ mediapipe_ts_library( srcs = ["index.ts"], deps = [ "//mediapipe/tasks/web/vision/gesture_recognizer", + "//mediapipe/tasks/web/vision/hand_landmarker", "//mediapipe/tasks/web/vision/image_classifier", "//mediapipe/tasks/web/vision/object_detector", ], diff --git a/mediapipe/tasks/web/vision/hand_landmarker/BUILD b/mediapipe/tasks/web/vision/hand_landmarker/BUILD new file mode 100644 index 000000000..9006b54ef --- /dev/null +++ b/mediapipe/tasks/web/vision/hand_landmarker/BUILD @@ -0,0 +1,35 @@ +# This contains the MediaPipe Hand Landmarker Task. +# +# This task takes video frames and outputs synchronized frames along with +# the detection results for one or more hand categories, using Hand Landmarker. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_ts_library") + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +mediapipe_ts_library( + name = "hand_landmarker", + srcs = [ + "hand_landmarker.ts", + "hand_landmarker_options.ts", + "hand_landmarker_result.ts", + ], + deps = [ + "//mediapipe/framework:calculator_jspb_proto", + "//mediapipe/framework:calculator_options_jspb_proto", + "//mediapipe/framework/formats:classification_jspb_proto", + "//mediapipe/framework/formats:landmark_jspb_proto", + "//mediapipe/framework/formats:rect_jspb_proto", + "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_jspb_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_jspb_proto", + "//mediapipe/tasks/web/components/containers:category", + "//mediapipe/tasks/web/components/containers:landmark", + "//mediapipe/tasks/web/components/processors:base_options", + "//mediapipe/tasks/web/core", + "//mediapipe/tasks/web/core:task_runner", + "//mediapipe/web/graph_runner:wasm_mediapipe_lib_ts", + ], +) diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts new file mode 100644 index 000000000..017a9098c --- /dev/null +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts @@ -0,0 +1,319 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {CalculatorGraphConfig} from '../../../../framework/calculator_pb'; +import {CalculatorOptions} from '../../../../framework/calculator_options_pb'; +import {ClassificationList} from '../../../../framework/formats/classification_pb'; +import {LandmarkList, NormalizedLandmarkList} from '../../../../framework/formats/landmark_pb'; +import {NormalizedRect} from '../../../../framework/formats/rect_pb'; +import {HandDetectorGraphOptions} from '../../../../tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb'; +import {HandLandmarkerGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb'; +import {HandLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb'; +import {Category} from '../../../../tasks/web/components/containers/category'; +import {Landmark} from '../../../../tasks/web/components/containers/landmark'; +import {convertBaseOptionsToProto} from '../../../../tasks/web/components/processors/base_options'; +import {TaskRunner} from '../../../../tasks/web/core/task_runner'; +import {WasmLoaderOptions} from '../../../../tasks/web/core/wasm_loader_options'; +import {createMediaPipeLib, FileLocator, ImageSource, WasmModule} from '../../../../web/graph_runner/wasm_mediapipe_lib'; +// Placeholder for internal dependency on trusted resource url + +import {HandLandmarkerOptions} from './hand_landmarker_options'; +import {HandLandmarkerResult} from './hand_landmarker_result'; + +export {ImageSource}; + +// The OSS JS API does not support the builder pattern. +// tslint:disable:jspb-use-builder-pattern + +const IMAGE_STREAM = 'image_in'; +const NORM_RECT_STREAM = 'norm_rect'; +const LANDMARKS_STREAM = 'hand_landmarks'; +const WORLD_LANDMARKS_STREAM = 'world_hand_landmarks'; +const HANDEDNESS_STREAM = 'handedness'; +const HAND_LANDMARKER_GRAPH = + 'mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph'; + +const DEFAULT_NUM_HANDS = 1; +const DEFAULT_SCORE_THRESHOLD = 0.5; +const DEFAULT_CATEGORY_INDEX = -1; +const FULL_IMAGE_RECT = new NormalizedRect(); +FULL_IMAGE_RECT.setXCenter(0.5); +FULL_IMAGE_RECT.setYCenter(0.5); +FULL_IMAGE_RECT.setWidth(1); +FULL_IMAGE_RECT.setHeight(1); + +/** Performs hand landmarks detection on images. */ +export class HandLandmarker extends TaskRunner { + private landmarks: Landmark[][] = []; + private worldLandmarks: Landmark[][] = []; + private handednesses: Category[][] = []; + + private readonly options: HandLandmarkerGraphOptions; + private readonly handLandmarksDetectorGraphOptions: + HandLandmarksDetectorGraphOptions; + private readonly handDetectorGraphOptions: HandDetectorGraphOptions; + + /** + * Initializes the Wasm runtime and creates a new `HandLandmarker` from the + * provided options. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param handLandmarkerOptions The options for the HandLandmarker. + * Note that either a path to the model asset or a model buffer needs to + * be provided (via `baseOptions`). + */ + static async createFromOptions( + wasmLoaderOptions: WasmLoaderOptions, + handLandmarkerOptions: HandLandmarkerOptions): Promise { + // Create a file locator based on the loader options + const fileLocator: FileLocator = { + locateFile() { + // The only file we load via this mechanism is the Wasm binary + return wasmLoaderOptions.wasmBinaryPath.toString(); + } + }; + + const landmarker = await createMediaPipeLib( + HandLandmarker, wasmLoaderOptions.wasmLoaderPath, + /* assetLoaderScript= */ undefined, + /* glCanvas= */ undefined, fileLocator); + await landmarker.setOptions(handLandmarkerOptions); + return landmarker; + } + + /** + * Initializes the Wasm runtime and creates a new `HandLandmarker` based on + * the provided model asset buffer. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param modelAssetBuffer A binary representation of the model. + */ + static createFromModelBuffer( + wasmLoaderOptions: WasmLoaderOptions, + modelAssetBuffer: Uint8Array): Promise { + return HandLandmarker.createFromOptions( + wasmLoaderOptions, {baseOptions: {modelAssetBuffer}}); + } + + /** + * Initializes the Wasm runtime and creates a new `HandLandmarker` based on + * the path to the model asset. + * @param wasmLoaderOptions A configuration object that provides the location + * of the Wasm binary and its loader. + * @param modelAssetPath The path to the model asset. + */ + static async createFromModelPath( + wasmLoaderOptions: WasmLoaderOptions, + modelAssetPath: string): Promise { + const response = await fetch(modelAssetPath.toString()); + const graphData = await response.arrayBuffer(); + return HandLandmarker.createFromModelBuffer( + wasmLoaderOptions, new Uint8Array(graphData)); + } + + constructor(wasmModule: WasmModule) { + super(wasmModule); + + this.options = new HandLandmarkerGraphOptions(); + this.handLandmarksDetectorGraphOptions = + new HandLandmarksDetectorGraphOptions(); + this.options.setHandLandmarksDetectorGraphOptions( + this.handLandmarksDetectorGraphOptions); + this.handDetectorGraphOptions = new HandDetectorGraphOptions(); + this.options.setHandDetectorGraphOptions(this.handDetectorGraphOptions); + + this.initDefaults(); + + // Disables the automatic render-to-screen code, which allows for pure + // CPU processing. + this.setAutoRenderToScreen(false); + } + + /** + * Sets new options for this `HandLandmarker`. + * + * Calling `setOptions()` with a subset of options only affects those options. + * You can reset an option back to its default value by explicitly setting it + * to `undefined`. + * + * @param options The options for the hand landmarker. + */ + async setOptions(options: HandLandmarkerOptions): Promise { + if (options.baseOptions) { + const baseOptionsProto = await convertBaseOptionsToProto( + options.baseOptions, this.options.getBaseOptions()); + this.options.setBaseOptions(baseOptionsProto); + } + + // Configure hand detector options. + if ('numHands' in options) { + this.handDetectorGraphOptions.setNumHands( + options.numHands ?? DEFAULT_NUM_HANDS); + } + if ('minHandDetectionConfidence' in options) { + this.handDetectorGraphOptions.setMinDetectionConfidence( + options.minHandDetectionConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + + // Configure hand landmark detector options. + if ('minTrackingConfidence' in options) { + this.options.setMinTrackingConfidence( + options.minTrackingConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + if ('minHandPresenceConfidence' in options) { + this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence( + options.minHandPresenceConfidence ?? DEFAULT_SCORE_THRESHOLD); + } + + this.refreshGraph(); + } + + /** + * Performs hand landmarks detection on the provided single image and waits + * synchronously for the response. + * @param imageSource An image source to process. + * @param timestamp The timestamp of the current frame, in ms. If not + * provided, defaults to `performance.now()`. + * @return The detected hand landmarks. + */ + detect(imageSource: ImageSource, timestamp: number = performance.now()): + HandLandmarkerResult { + this.landmarks = []; + this.worldLandmarks = []; + this.handednesses = []; + + this.addGpuBufferAsImageToStream(imageSource, IMAGE_STREAM, timestamp); + this.addProtoToStream( + FULL_IMAGE_RECT.serializeBinary(), 'mediapipe.NormalizedRect', + NORM_RECT_STREAM, timestamp); + this.finishProcessing(); + + return { + landmarks: this.landmarks, + worldLandmarks: this.worldLandmarks, + handednesses: this.handednesses + }; + } + + /** Sets the default values for the graph. */ + private initDefaults(): void { + this.handDetectorGraphOptions.setNumHands(DEFAULT_NUM_HANDS); + this.handDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence( + DEFAULT_SCORE_THRESHOLD); + this.options.setMinTrackingConfidence(DEFAULT_SCORE_THRESHOLD); + } + + /** Converts the proto data to a Category[][] structure. */ + private toJsCategories(data: Uint8Array[]): Category[][] { + const result: Category[][] = []; + for (const binaryProto of data) { + const inputList = ClassificationList.deserializeBinary(binaryProto); + const outputList: Category[] = []; + for (const classification of inputList.getClassificationList()) { + outputList.push({ + score: classification.getScore() ?? 0, + index: classification.getIndex() ?? DEFAULT_CATEGORY_INDEX, + categoryName: classification.getLabel() ?? '', + displayName: classification.getDisplayName() ?? '', + }); + } + result.push(outputList); + } + return result; + } + + /** Converts raw data into a landmark, and adds it to our landmarks list. */ + private addJsLandmarks(data: Uint8Array[]): void { + for (const binaryProto of data) { + const handLandmarksProto = + NormalizedLandmarkList.deserializeBinary(binaryProto); + const landmarks: Landmark[] = []; + for (const handLandmarkProto of handLandmarksProto.getLandmarkList()) { + landmarks.push({ + x: handLandmarkProto.getX() ?? 0, + y: handLandmarkProto.getY() ?? 0, + z: handLandmarkProto.getZ() ?? 0, + normalized: true + }); + } + this.landmarks.push(landmarks); + } + } + + /** + * Converts raw data into a landmark, and adds it to our worldLandmarks + * list. + */ + private adddJsWorldLandmarks(data: Uint8Array[]): void { + for (const binaryProto of data) { + const handWorldLandmarksProto = + LandmarkList.deserializeBinary(binaryProto); + const worldLandmarks: Landmark[] = []; + for (const handWorldLandmarkProto of + handWorldLandmarksProto.getLandmarkList()) { + worldLandmarks.push({ + x: handWorldLandmarkProto.getX() ?? 0, + y: handWorldLandmarkProto.getY() ?? 0, + z: handWorldLandmarkProto.getZ() ?? 0, + normalized: false + }); + } + this.worldLandmarks.push(worldLandmarks); + } + } + + /** Updates the MediaPipe graph configuration. */ + private refreshGraph(): void { + const graphConfig = new CalculatorGraphConfig(); + graphConfig.addInputStream(IMAGE_STREAM); + graphConfig.addInputStream(NORM_RECT_STREAM); + graphConfig.addOutputStream(LANDMARKS_STREAM); + graphConfig.addOutputStream(WORLD_LANDMARKS_STREAM); + graphConfig.addOutputStream(HANDEDNESS_STREAM); + + const calculatorOptions = new CalculatorOptions(); + calculatorOptions.setExtension( + HandLandmarkerGraphOptions.ext, this.options); + + const landmarkerNode = new CalculatorGraphConfig.Node(); + landmarkerNode.setCalculator(HAND_LANDMARKER_GRAPH); + landmarkerNode.addInputStream('IMAGE:' + IMAGE_STREAM); + landmarkerNode.addInputStream('NORM_RECT:' + NORM_RECT_STREAM); + landmarkerNode.addOutputStream('LANDMARKS:' + LANDMARKS_STREAM); + landmarkerNode.addOutputStream('WORLD_LANDMARKS:' + WORLD_LANDMARKS_STREAM); + landmarkerNode.addOutputStream('HANDEDNESS:' + HANDEDNESS_STREAM); + landmarkerNode.setOptions(calculatorOptions); + + graphConfig.addNode(landmarkerNode); + + this.attachProtoVectorListener(LANDMARKS_STREAM, binaryProto => { + this.addJsLandmarks(binaryProto); + }); + this.attachProtoVectorListener(WORLD_LANDMARKS_STREAM, binaryProto => { + this.adddJsWorldLandmarks(binaryProto); + }); + this.attachProtoVectorListener(HANDEDNESS_STREAM, binaryProto => { + this.handednesses.push(...this.toJsCategories(binaryProto)); + }); + + const binaryGraph = graphConfig.serializeBinary(); + this.setGraph(new Uint8Array(binaryGraph), /* isBinary= */ true); + } +} + + diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_options.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_options.ts new file mode 100644 index 000000000..53ad9440a --- /dev/null +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_options.ts @@ -0,0 +1,47 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {BaseOptions} from '../../../../tasks/web/core/base_options'; + +/** Options to configure the MediaPipe HandLandmarker Task */ +export declare interface HandLandmarkerOptions { + /** Options to configure the loading of the model assets. */ + baseOptions?: BaseOptions; + + /** + * The maximum number of hands can be detected by the HandLandmarker. + * Defaults to 1. + */ + numHands?: number|undefined; + + /** + * The minimum confidence score for the hand detection to be considered + * successful. Defaults to 0.5. + */ + minHandDetectionConfidence?: number|undefined; + + /** + * The minimum confidence score of hand presence score in the hand landmark + * detection. Defaults to 0.5. + */ + minHandPresenceConfidence?: number|undefined; + + /** + * The minimum confidence score for the hand tracking to be considered + * successful. Defaults to 0.5. + */ + minTrackingConfidence?: number|undefined; +} diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.ts new file mode 100644 index 000000000..044bdfbe7 --- /dev/null +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.ts @@ -0,0 +1,32 @@ +/** + * Copyright 2022 The MediaPipe Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {Category} from '../../../../tasks/web/components/containers/category'; +import {Landmark} from '../../../../tasks/web/components/containers/landmark'; + +/** + * Represents the hand landmarks deection results generated by `HandLandmarker`. + */ +export declare interface HandLandmarkerResult { + /** Hand landmarks of detected hands. */ + landmarks: Landmark[][]; + + /** Hand landmarks in world coordniates of detected hands. */ + worldLandmarks: Landmark[][]; + + /** Handedness of detected hands. */ + handednesses: Category[][]; +} diff --git a/mediapipe/tasks/web/vision/index.ts b/mediapipe/tasks/web/vision/index.ts index 7cc915f25..2c46dbd3b 100644 --- a/mediapipe/tasks/web/vision/index.ts +++ b/mediapipe/tasks/web/vision/index.ts @@ -24,6 +24,11 @@ export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer_o export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer_result'; export * from '../../../tasks/web/vision/gesture_recognizer/gesture_recognizer'; +// Hand Landmarker +export * from '../../../tasks/web/vision/hand_landmarker/hand_landmarker_options'; +export * from '../../../tasks/web/vision/hand_landmarker/hand_landmarker_result'; +export * from '../../../tasks/web/vision/hand_landmarker/hand_landmarker'; + // Object Detector export * from '../../../tasks/web/vision/object_detector/object_detector_options'; export * from '../../../tasks/web/vision/object_detector/object_detector_result';