412 lines
17 KiB
TypeScript
412 lines
17 KiB
TypeScript
/**
|
|
* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import {CalculatorGraphConfig} from '../../../../framework/calculator_pb';
|
|
import {CalculatorOptions} from '../../../../framework/calculator_options_pb';
|
|
import {ClassificationList} from '../../../../framework/formats/classification_pb';
|
|
import {LandmarkList, NormalizedLandmarkList} from '../../../../framework/formats/landmark_pb';
|
|
import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb';
|
|
import {GestureClassifierGraphOptions} from '../../../../tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb';
|
|
import {GestureRecognizerGraphOptions} from '../../../../tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb';
|
|
import {HandGestureRecognizerGraphOptions} from '../../../../tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb';
|
|
import {HandDetectorGraphOptions} from '../../../../tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb';
|
|
import {HandLandmarkerGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb';
|
|
import {HandLandmarksDetectorGraphOptions} from '../../../../tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb';
|
|
import {Category} from '../../../../tasks/web/components/containers/category';
|
|
import {Landmark, NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark';
|
|
import {convertClassifierOptionsToProto} from '../../../../tasks/web/components/processors/classifier_options';
|
|
import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset';
|
|
import {ImageProcessingOptions} from '../../../../tasks/web/vision/core/image_processing_options';
|
|
import {VisionGraphRunner, VisionTaskRunner} from '../../../../tasks/web/vision/core/vision_task_runner';
|
|
import {ImageSource, WasmModule} from '../../../../web/graph_runner/graph_runner';
|
|
// Placeholder for internal dependency on trusted resource url
|
|
|
|
import {GestureRecognizerOptions} from './gesture_recognizer_options';
|
|
import {GestureRecognizerResult} from './gesture_recognizer_result';
|
|
|
|
export * from './gesture_recognizer_options';
|
|
export * from './gesture_recognizer_result';
|
|
export {ImageSource};
|
|
|
|
// The OSS JS API does not support the builder pattern.
|
|
// tslint:disable:jspb-use-builder-pattern
|
|
|
|
const IMAGE_STREAM = 'image_in';
|
|
const NORM_RECT_STREAM = 'norm_rect';
|
|
const HAND_GESTURES_STREAM = 'hand_gestures';
|
|
const LANDMARKS_STREAM = 'hand_landmarks';
|
|
const WORLD_LANDMARKS_STREAM = 'world_hand_landmarks';
|
|
const HANDEDNESS_STREAM = 'handedness';
|
|
const GESTURE_RECOGNIZER_GRAPH =
|
|
'mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph';
|
|
|
|
const DEFAULT_NUM_HANDS = 1;
|
|
const DEFAULT_SCORE_THRESHOLD = 0.5;
|
|
const DEFAULT_CATEGORY_INDEX = -1;
|
|
|
|
/** Performs hand gesture recognition on images. */
|
|
export class GestureRecognizer extends VisionTaskRunner {
|
|
private gestures: Category[][] = [];
|
|
private landmarks: NormalizedLandmark[][] = [];
|
|
private worldLandmarks: Landmark[][] = [];
|
|
private handednesses: Category[][] = [];
|
|
|
|
private readonly options: GestureRecognizerGraphOptions;
|
|
private readonly handLandmarkerGraphOptions: HandLandmarkerGraphOptions;
|
|
private readonly handLandmarksDetectorGraphOptions:
|
|
HandLandmarksDetectorGraphOptions;
|
|
private readonly handDetectorGraphOptions: HandDetectorGraphOptions;
|
|
private readonly handGestureRecognizerGraphOptions:
|
|
HandGestureRecognizerGraphOptions;
|
|
|
|
/**
|
|
* Initializes the Wasm runtime and creates a new gesture recognizer from the
|
|
* provided options.
|
|
* @param wasmFileset A configuration object that provides the location of the
|
|
* Wasm binary and its loader.
|
|
* @param gestureRecognizerOptions The options for the gesture recognizer.
|
|
* Note that either a path to the model asset or a model buffer needs to
|
|
* be provided (via `baseOptions`).
|
|
*/
|
|
static createFromOptions(
|
|
wasmFileset: WasmFileset,
|
|
gestureRecognizerOptions: GestureRecognizerOptions):
|
|
Promise<GestureRecognizer> {
|
|
return VisionTaskRunner.createInstance(
|
|
GestureRecognizer, /* initializeCanvas= */ true, wasmFileset,
|
|
gestureRecognizerOptions);
|
|
}
|
|
|
|
/**
|
|
* Initializes the Wasm runtime and creates a new gesture recognizer based on
|
|
* the provided model asset buffer.
|
|
* @param wasmFileset A configuration object that provides the location of the
|
|
* Wasm binary and its loader.
|
|
* @param modelAssetBuffer A binary representation of the model.
|
|
*/
|
|
static createFromModelBuffer(
|
|
wasmFileset: WasmFileset,
|
|
modelAssetBuffer: Uint8Array): Promise<GestureRecognizer> {
|
|
return VisionTaskRunner.createInstance(
|
|
GestureRecognizer, /* initializeCanvas= */ true, wasmFileset,
|
|
{baseOptions: {modelAssetBuffer}});
|
|
}
|
|
|
|
/**
|
|
* Initializes the Wasm runtime and creates a new gesture recognizer based on
|
|
* the path to the model asset.
|
|
* @param wasmFileset A configuration object that provides the location of the
|
|
* Wasm binary and its loader.
|
|
* @param modelAssetPath The path to the model asset.
|
|
*/
|
|
static createFromModelPath(
|
|
wasmFileset: WasmFileset,
|
|
modelAssetPath: string): Promise<GestureRecognizer> {
|
|
return VisionTaskRunner.createInstance(
|
|
GestureRecognizer, /* initializeCanvas= */ true, wasmFileset,
|
|
{baseOptions: {modelAssetPath}});
|
|
}
|
|
|
|
/** @hideconstructor */
|
|
constructor(
|
|
wasmModule: WasmModule,
|
|
glCanvas?: HTMLCanvasElement|OffscreenCanvas|null) {
|
|
super(
|
|
new VisionGraphRunner(wasmModule, glCanvas), IMAGE_STREAM,
|
|
NORM_RECT_STREAM);
|
|
|
|
this.options = new GestureRecognizerGraphOptions();
|
|
this.options.setBaseOptions(new BaseOptionsProto());
|
|
this.handLandmarkerGraphOptions = new HandLandmarkerGraphOptions();
|
|
this.options.setHandLandmarkerGraphOptions(this.handLandmarkerGraphOptions);
|
|
this.handLandmarksDetectorGraphOptions =
|
|
new HandLandmarksDetectorGraphOptions();
|
|
this.handLandmarkerGraphOptions.setHandLandmarksDetectorGraphOptions(
|
|
this.handLandmarksDetectorGraphOptions);
|
|
this.handDetectorGraphOptions = new HandDetectorGraphOptions();
|
|
this.handLandmarkerGraphOptions.setHandDetectorGraphOptions(
|
|
this.handDetectorGraphOptions);
|
|
this.handGestureRecognizerGraphOptions =
|
|
new HandGestureRecognizerGraphOptions();
|
|
this.options.setHandGestureRecognizerGraphOptions(
|
|
this.handGestureRecognizerGraphOptions);
|
|
|
|
this.initDefaults();
|
|
}
|
|
|
|
protected override get baseOptions(): BaseOptionsProto {
|
|
return this.options.getBaseOptions()!;
|
|
}
|
|
|
|
protected override set baseOptions(proto: BaseOptionsProto) {
|
|
this.options.setBaseOptions(proto);
|
|
}
|
|
|
|
/**
|
|
* Sets new options for the gesture recognizer.
|
|
*
|
|
* Calling `setOptions()` with a subset of options only affects those options.
|
|
* You can reset an option back to its default value by explicitly setting it
|
|
* to `undefined`.
|
|
*
|
|
* @param options The options for the gesture recognizer.
|
|
*/
|
|
override setOptions(options: GestureRecognizerOptions): Promise<void> {
|
|
if ('numHands' in options) {
|
|
this.handDetectorGraphOptions.setNumHands(
|
|
options.numHands ?? DEFAULT_NUM_HANDS);
|
|
}
|
|
if ('minHandDetectionConfidence' in options) {
|
|
this.handDetectorGraphOptions.setMinDetectionConfidence(
|
|
options.minHandDetectionConfidence ?? DEFAULT_SCORE_THRESHOLD);
|
|
}
|
|
if ('minHandPresenceConfidence' in options) {
|
|
this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence(
|
|
options.minHandPresenceConfidence ?? DEFAULT_SCORE_THRESHOLD);
|
|
}
|
|
if ('minTrackingConfidence' in options) {
|
|
this.handLandmarkerGraphOptions.setMinTrackingConfidence(
|
|
options.minTrackingConfidence ?? DEFAULT_SCORE_THRESHOLD);
|
|
}
|
|
|
|
if (options.cannedGesturesClassifierOptions) {
|
|
// Note that we have to support both JSPB and ProtobufJS and cannot
|
|
// use JSPB's getMutableX() APIs.
|
|
const graphOptions = new GestureClassifierGraphOptions();
|
|
graphOptions.setClassifierOptions(convertClassifierOptionsToProto(
|
|
options.cannedGesturesClassifierOptions,
|
|
this.handGestureRecognizerGraphOptions
|
|
.getCannedGestureClassifierGraphOptions()
|
|
?.getClassifierOptions()));
|
|
this.handGestureRecognizerGraphOptions
|
|
.setCannedGestureClassifierGraphOptions(graphOptions);
|
|
} else if (options.cannedGesturesClassifierOptions === undefined) {
|
|
this.handGestureRecognizerGraphOptions
|
|
.getCannedGestureClassifierGraphOptions()
|
|
?.clearClassifierOptions();
|
|
}
|
|
|
|
if (options.customGesturesClassifierOptions) {
|
|
const graphOptions = new GestureClassifierGraphOptions();
|
|
graphOptions.setClassifierOptions(convertClassifierOptionsToProto(
|
|
options.customGesturesClassifierOptions,
|
|
this.handGestureRecognizerGraphOptions
|
|
.getCustomGestureClassifierGraphOptions()
|
|
?.getClassifierOptions()));
|
|
this.handGestureRecognizerGraphOptions
|
|
.setCustomGestureClassifierGraphOptions(graphOptions);
|
|
} else if (options.customGesturesClassifierOptions === undefined) {
|
|
this.handGestureRecognizerGraphOptions
|
|
.getCustomGestureClassifierGraphOptions()
|
|
?.clearClassifierOptions();
|
|
}
|
|
|
|
return this.applyOptions(options);
|
|
}
|
|
|
|
/**
|
|
* Performs gesture recognition on the provided single image and waits
|
|
* synchronously for the response. Only use this method when the
|
|
* GestureRecognizer is created with running mode `image`.
|
|
*
|
|
* @param image A single image to process.
|
|
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
* to process the input image before running inference.
|
|
* @return The detected gestures.
|
|
*/
|
|
recognize(
|
|
image: ImageSource, imageProcessingOptions?: ImageProcessingOptions):
|
|
GestureRecognizerResult {
|
|
this.resetResults();
|
|
this.processImageData(image, imageProcessingOptions);
|
|
return this.processResults();
|
|
}
|
|
|
|
/**
|
|
* Performs gesture recognition on the provided video frame and waits
|
|
* synchronously for the response. Only use this method when the
|
|
* GestureRecognizer is created with running mode `video`.
|
|
*
|
|
* @param videoFrame A video frame to process.
|
|
* @param timestamp The timestamp of the current frame, in ms.
|
|
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
* to process the input image before running inference.
|
|
* @return The detected gestures.
|
|
*/
|
|
recognizeForVideo(
|
|
videoFrame: ImageSource, timestamp: number,
|
|
imageProcessingOptions?: ImageProcessingOptions):
|
|
GestureRecognizerResult {
|
|
this.resetResults();
|
|
this.processVideoData(videoFrame, imageProcessingOptions, timestamp);
|
|
return this.processResults();
|
|
}
|
|
|
|
private resetResults(): void {
|
|
this.gestures = [];
|
|
this.landmarks = [];
|
|
this.worldLandmarks = [];
|
|
this.handednesses = [];
|
|
}
|
|
|
|
private processResults(): GestureRecognizerResult {
|
|
if (this.gestures.length === 0) {
|
|
// If no gestures are detected in the image, just return an empty list
|
|
return {
|
|
gestures: [],
|
|
landmarks: [],
|
|
worldLandmarks: [],
|
|
handednesses: [],
|
|
};
|
|
} else {
|
|
return {
|
|
gestures: this.gestures,
|
|
landmarks: this.landmarks,
|
|
worldLandmarks: this.worldLandmarks,
|
|
handednesses: this.handednesses
|
|
};
|
|
}
|
|
}
|
|
|
|
/** Sets the default values for the graph. */
|
|
private initDefaults(): void {
|
|
this.handDetectorGraphOptions.setNumHands(DEFAULT_NUM_HANDS);
|
|
this.handDetectorGraphOptions.setMinDetectionConfidence(
|
|
DEFAULT_SCORE_THRESHOLD);
|
|
this.handLandmarksDetectorGraphOptions.setMinDetectionConfidence(
|
|
DEFAULT_SCORE_THRESHOLD);
|
|
this.handLandmarkerGraphOptions.setMinTrackingConfidence(
|
|
DEFAULT_SCORE_THRESHOLD);
|
|
}
|
|
|
|
/** Converts the proto data to a Category[][] structure. */
|
|
private toJsCategories(data: Uint8Array[], populateIndex = true):
|
|
Category[][] {
|
|
const result: Category[][] = [];
|
|
for (const binaryProto of data) {
|
|
const inputList = ClassificationList.deserializeBinary(binaryProto);
|
|
const outputList: Category[] = [];
|
|
for (const classification of inputList.getClassificationList()) {
|
|
const index = populateIndex && classification.hasIndex() ?
|
|
classification.getIndex()! :
|
|
DEFAULT_CATEGORY_INDEX;
|
|
outputList.push({
|
|
score: classification.getScore() ?? 0,
|
|
index,
|
|
categoryName: classification.getLabel() ?? '',
|
|
displayName: classification.getDisplayName() ?? '',
|
|
});
|
|
}
|
|
result.push(outputList);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/** Converts raw data into a landmark, and adds it to our landmarks list. */
|
|
private addJsLandmarks(data: Uint8Array[]): void {
|
|
for (const binaryProto of data) {
|
|
const handLandmarksProto =
|
|
NormalizedLandmarkList.deserializeBinary(binaryProto);
|
|
const landmarks: NormalizedLandmark[] = [];
|
|
for (const handLandmarkProto of handLandmarksProto.getLandmarkList()) {
|
|
landmarks.push({
|
|
x: handLandmarkProto.getX() ?? 0,
|
|
y: handLandmarkProto.getY() ?? 0,
|
|
z: handLandmarkProto.getZ() ?? 0
|
|
});
|
|
}
|
|
this.landmarks.push(landmarks);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Converts raw data into a landmark, and adds it to our worldLandmarks
|
|
* list.
|
|
*/
|
|
private adddJsWorldLandmarks(data: Uint8Array[]): void {
|
|
for (const binaryProto of data) {
|
|
const handWorldLandmarksProto =
|
|
LandmarkList.deserializeBinary(binaryProto);
|
|
const worldLandmarks: Landmark[] = [];
|
|
for (const handWorldLandmarkProto of
|
|
handWorldLandmarksProto.getLandmarkList()) {
|
|
worldLandmarks.push({
|
|
x: handWorldLandmarkProto.getX() ?? 0,
|
|
y: handWorldLandmarkProto.getY() ?? 0,
|
|
z: handWorldLandmarkProto.getZ() ?? 0
|
|
});
|
|
}
|
|
this.worldLandmarks.push(worldLandmarks);
|
|
}
|
|
}
|
|
|
|
/** Updates the MediaPipe graph configuration. */
|
|
protected override refreshGraph(): void {
|
|
const graphConfig = new CalculatorGraphConfig();
|
|
graphConfig.addInputStream(IMAGE_STREAM);
|
|
graphConfig.addInputStream(NORM_RECT_STREAM);
|
|
graphConfig.addOutputStream(HAND_GESTURES_STREAM);
|
|
graphConfig.addOutputStream(LANDMARKS_STREAM);
|
|
graphConfig.addOutputStream(WORLD_LANDMARKS_STREAM);
|
|
graphConfig.addOutputStream(HANDEDNESS_STREAM);
|
|
|
|
const calculatorOptions = new CalculatorOptions();
|
|
calculatorOptions.setExtension(
|
|
GestureRecognizerGraphOptions.ext, this.options);
|
|
|
|
const recognizerNode = new CalculatorGraphConfig.Node();
|
|
recognizerNode.setCalculator(GESTURE_RECOGNIZER_GRAPH);
|
|
recognizerNode.addInputStream('IMAGE:' + IMAGE_STREAM);
|
|
recognizerNode.addInputStream('NORM_RECT:' + NORM_RECT_STREAM);
|
|
recognizerNode.addOutputStream('HAND_GESTURES:' + HAND_GESTURES_STREAM);
|
|
recognizerNode.addOutputStream('LANDMARKS:' + LANDMARKS_STREAM);
|
|
recognizerNode.addOutputStream('WORLD_LANDMARKS:' + WORLD_LANDMARKS_STREAM);
|
|
recognizerNode.addOutputStream('HANDEDNESS:' + HANDEDNESS_STREAM);
|
|
recognizerNode.setOptions(calculatorOptions);
|
|
|
|
graphConfig.addNode(recognizerNode);
|
|
|
|
this.graphRunner.attachProtoVectorListener(
|
|
LANDMARKS_STREAM, (binaryProto, timestamp) => {
|
|
this.addJsLandmarks(binaryProto);
|
|
this.setLatestOutputTimestamp(timestamp);
|
|
});
|
|
this.graphRunner.attachProtoVectorListener(
|
|
WORLD_LANDMARKS_STREAM, (binaryProto, timestamp) => {
|
|
this.adddJsWorldLandmarks(binaryProto);
|
|
this.setLatestOutputTimestamp(timestamp);
|
|
});
|
|
this.graphRunner.attachProtoVectorListener(
|
|
HAND_GESTURES_STREAM, (binaryProto, timestamp) => {
|
|
// Gesture index is not used, because the final gesture result comes
|
|
// from multiple classifiers.
|
|
this.gestures.push(
|
|
...this.toJsCategories(binaryProto, /* populateIndex= */ false));
|
|
this.setLatestOutputTimestamp(timestamp);
|
|
});
|
|
this.graphRunner.attachProtoVectorListener(
|
|
HANDEDNESS_STREAM, (binaryProto, timestamp) => {
|
|
this.handednesses.push(...this.toJsCategories(binaryProto));
|
|
this.setLatestOutputTimestamp(timestamp);
|
|
});
|
|
|
|
const binaryGraph = graphConfig.serializeBinary();
|
|
this.setGraph(new Uint8Array(binaryGraph), /* isBinary= */ true);
|
|
}
|
|
}
|
|
|
|
|