diff --git a/mediapipe/tasks/web/vision/core/image.test.ts b/mediapipe/tasks/web/vision/core/image.test.ts index 31d30eefe..e991aa7dc 100644 --- a/mediapipe/tasks/web/vision/core/image.test.ts +++ b/mediapipe/tasks/web/vision/core/image.test.ts @@ -29,17 +29,19 @@ if (skip) { /** The image types supported by MPImage. */ type ImageType = ImageData|ImageBitmap|WebGLTexture; -const IMAGE_2_2 = [1, 0, 0, 255, 2, 0, 0, 255, 3, 0, 0, 255, 4, 0, 0, 255]; -const IMAGE_2_1 = [1, 0, 0, 255, 2, 0, 0, 255]; +const IMAGE_2_2 = [1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255, 4, 4, 4, 255]; +const IMAGE_2_1 = [1, 1, 1, 255, 2, 2, 2, 255]; const IMAGE_2_3 = [ - 1, 0, 0, 255, 2, 0, 0, 255, 3, 0, 0, 255, - 4, 0, 0, 255, 5, 0, 0, 255, 6, 0, 0, 255 + 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255, + 4, 4, 4, 255, 5, 5, 5, 255, 6, 6, 6, 255 ]; /** The test images and data to use for the unit tests below. */ class MPImageTestContext { canvas!: OffscreenCanvas; gl!: WebGL2RenderingContext; + uint8ClampedArray!: Uint8ClampedArray; + float32Array!: Float32Array; imageData!: ImageData; imageBitmap!: ImageBitmap; webGLTexture!: WebGLTexture; @@ -52,6 +54,13 @@ class MPImageTestContext { this.gl = this.canvas.getContext('webgl2') as WebGL2RenderingContext; const gl = this.gl; + + this.uint8ClampedArray = new Uint8ClampedArray(pixels.length / 4); + this.float32Array = new Float32Array(pixels.length / 4); + for (let i = 0; i < this.uint8ClampedArray.length; ++i) { + this.uint8ClampedArray[i] = pixels[i * 4]; + this.float32Array[i] = pixels[i * 4] / 255; + } this.imageData = new ImageData(new Uint8ClampedArray(pixels), width, height); this.imageBitmap = await createImageBitmap(this.imageData); @@ -65,6 +74,10 @@ class MPImageTestContext { get(type: unknown) { switch (type) { + case Uint8ClampedArray: + return this.uint8ClampedArray; + case Float32Array: + return this.float32Array; case ImageData: return this.imageData; case ImageBitmap: @@ -116,7 +129,13 @@ class MPImageTestContext { } function assertEquality(image: MPImage, expected: ImageType): void { - if (expected instanceof ImageData) { + if (expected instanceof Uint8ClampedArray) { + const result = image.getImage(MPImageStorageType.UINT8_CLAMPED_ARRAY); + expect(result).toEqual(expected); + } else if (expected instanceof Float32Array) { + const result = image.getImage(MPImageStorageType.FLOAT32_ARRAY); + expect(result).toEqual(expected); + } else if (expected instanceof ImageData) { const result = image.getImage(MPImageStorageType.IMAGE_DATA); expect(result).toEqual(expected); } else if (expected instanceof ImageBitmap) { @@ -158,7 +177,9 @@ class MPImageTestContext { shaderContext.close(); } - const sources = skip ? [] : [ImageData, ImageBitmap, WebGLTexture]; + const sources = skip ? + [] : + [Uint8ClampedArray, Float32Array, ImageData, ImageBitmap, WebGLTexture]; for (let i = 0; i < sources.length; i++) { for (let j = 0; j < sources.length; j++) { @@ -220,15 +241,41 @@ class MPImageTestContext { const shaderContext = new MPImageShaderContext(); const image = createImage(shaderContext, context.imageData, WIDTH, HEIGHT); + expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true); + expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(false); + expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(false); + expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false); + expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false); + + image.getImage(MPImageStorageType.UINT8_CLAMPED_ARRAY); + + expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true); + expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true); + expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(false); + expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false); + expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false); + + image.getImage(MPImageStorageType.FLOAT32_ARRAY); + + expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true); + expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true); + expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true); + expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false); + expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false); + image.getImage(MPImageStorageType.WEBGL_TEXTURE); expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true); + expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true); + expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true); expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(true); expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false); image.getImage(MPImageStorageType.IMAGE_BITMAP); expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true); + expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true); + expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true); expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(true); expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(true); diff --git a/mediapipe/tasks/web/vision/core/image.ts b/mediapipe/tasks/web/vision/core/image.ts index 3c87e0d2b..739f05f0f 100644 --- a/mediapipe/tasks/web/vision/core/image.ts +++ b/mediapipe/tasks/web/vision/core/image.ts @@ -16,6 +16,12 @@ /** The underlying type of the image. */ export enum MPImageStorageType { + /** Represents the native `UInt8ClampedArray` type. */ + UINT8_CLAMPED_ARRAY, + /** + * Represents the native `Float32Array` type. Values range from [0.0, 1.0]. + */ + FLOAT32_ARRAY, /** Represents the native `ImageData` type. */ IMAGE_DATA, /** Represents the native `ImageBitmap` type. */ @@ -213,6 +219,7 @@ export class MPImageShaderContext { return result; } + /** * Binds a framebuffer to the canvas. If the framebuffer does not yet exist, * creates it first. Binds the provided texture to the framebuffer. @@ -251,6 +258,97 @@ export class MPImageShaderContext { } } +/** + * An interface that can be used to provide custom conversion functions. These + * functions are invoked to convert pixel values between different channel + * counts and value ranges. Any conversion function that is not specified will + * result in a default conversion. + */ +export interface MPImageChannelConverter { + /** + * A conversion function to convert a number in the [0.0, 1.0] range to RGBA. + * The output is an array with four elemeents whose values range from 0 to 255 + * inclusive. + * + * The default conversion function is `[v * 255, v * 255, v * 255, 255]` + * and will log a warning if invoked. + */ + floatToRGBAConverter?: (value: number) => [number, number, number, number]; + + /* + * A conversion function to convert a number in the [0, 255] range to RGBA. + * The output is an array with four elemeents whose values range from 0 to 255 + * inclusive. + * + * The default conversion function is `[v, v , v , 255]` and will log a + * warning if invoked. + */ + uint8ToRGBAConverter?: (value: number) => [number, number, number, number]; + + /** + * A conversion function to convert an RGBA value in the range of 0 to 255 to + * a single value in the [0.0, 1.0] range. + * + * The default conversion function is `(r / 3 + g / 3 + b / 3) / 255` and will + * log a warning if invoked. + */ + rgbaToFloatConverter?: (r: number, g: number, b: number, a: number) => number; + + /** + * A conversion function to convert an RGBA value in the range of 0 to 255 to + * a single value in the [0, 255] range. + * + * The default conversion function is `r / 3 + g / 3 + b / 3` and will log a + * warning if invoked. + */ + rgbaToUint8Converter?: (r: number, g: number, b: number, a: number) => number; + + /** + * A conversion function to convert a single value in the 0.0 to 1.0 range to + * [0, 255]. + * + * The default conversion function is `r * 255` and will log a warning if + * invoked. + */ + floatToUint8Converter?: (value: number) => number; + + /** + * A conversion function to convert a single value in the 0 to 255 range to + * [0.0, 1.0] . + * + * The default conversion function is `r / 255` and will log a warning if + * invoked. + */ + uint8ToFloatConverter?: (value: number) => number; +} + +const DEFAULT_CONVERTER: Required = { + floatToRGBAConverter: v => { + console.log('Using default floatToRGBAConverter'); + return [v * 255, v * 255, v * 255, 255]; + }, + uint8ToRGBAConverter: v => { + console.log('Using default uint8ToRGBAConverter'); + return [v, v, v, 255]; + }, + rgbaToFloatConverter: (r, g, b) => { + console.log('Using default floatToRGBAConverter'); + return (r / 3 + g / 3 + b / 3) / 255; + }, + rgbaToUint8Converter: (r, g, b) => { + console.log('Using default rgbaToUint8Converter'); + return r / 3 + g / 3 + b / 3; + }, + floatToUint8Converter: v => { + console.log('Using default floatToUint8Converter'); + return v * 255; + }, + uint8ToFloatConverter: v => { + console.log('Using default uint8ToFloatConverter'); + return v / 255; + }, +}; + /** * The wrapper class for MediaPipe Image objects. * @@ -270,6 +368,14 @@ export class MPImageShaderContext { * initialized with an `OffscreenCanvas`. As we require WebGL2 support, this * places some limitations on Browser support as outlined here: * https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas/getContext + * + * Some MediaPipe tasks return single channel masks. These masks are stored + * using an underlying `Uint8ClampedArray` an `Float32Array` (represented as + * single-channel arrays). To convert these type to other formats a conversion + * function is invoked to convert pixel values between single channel and four + * channel RGBA values. To customize this conversion, you can specify these + * conversion functions when you invoke `getImage()`. If you use the default + * conversion function a warning will be logged to the console. */ export class MPImage { private gl?: WebGL2RenderingContext; @@ -297,49 +403,110 @@ export class MPImage { return !!this.getContainer(type); } + /** + * Returns the underlying image as a single channel `Uint8ClampedArray`. Note + * that this involves an expensive GPU to CPU transfer if the current image is + * only available as an `ImageBitmap` or `WebGLTexture`. If necessary, this + * function converts RGBA data pixel-by-pixel to a single channel value by + * invoking a conversion function (see class comment for detail). + * + * @param type The type of image to return. + * @param converter A set of conversion functions that will be invoked to + * convert the underlying pixel data if necessary. You may omit this + * function if the requested conversion does not change the pixel format. + * @return The current data as a Uint8ClampedArray. + */ + getImage( + type: MPImageStorageType.UINT8_CLAMPED_ARRAY, + converter?: MPImageChannelConverter): Uint8ClampedArray; + /** + * Returns the underlying image as a single channel `Float32Array`. Note + * that this involves an expensive GPU to CPU transfer if the current image is + * only available as an `ImageBitmap` or `WebGLTexture`. If necessary, this + * function converts RGBA data pixel-by-pixel to a single channel value by + * invoking a conversion function (see class comment for detail). + * + * @param type The type of image to return. + * @param converter A set of conversion functions that will be invoked to + * convert the underlying pixel data if necessary. You may omit this + * function if the requested conversion does not change the pixel format. + * @return The current image as a Float32Array. + */ + getImage( + type: MPImageStorageType.FLOAT32_ARRAY, + converter?: MPImageChannelConverter): Float32Array; /** * Returns the underlying image as an `ImageData` object. Note that this * involves an expensive GPU to CPU transfer if the current image is only - * available as an `ImageBitmap` or `WebGLTexture`. + * available as an `ImageBitmap` or `WebGLTexture`. If necessary, this + * function converts single channel pixel values to RGBA by invoking a + * conversion function (see class comment for detail). * * @return The current image as an ImageData object. */ - getImage(type: MPImageStorageType.IMAGE_DATA): ImageData; + getImage( + type: MPImageStorageType.IMAGE_DATA, + converter?: MPImageChannelConverter): ImageData; /** * Returns the underlying image as an `ImageBitmap`. Note that * conversions to `ImageBitmap` are expensive, especially if the data - * currently resides on CPU. + * currently resides on CPU. If necessary, this function first converts single + * channel pixel values to RGBA by invoking a conversion function (see class + * comment for detail). * * Processing with `ImageBitmap`s requires that the MediaPipe Task was * initialized with an `OffscreenCanvas` with WebGL2 support. See * https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas/getContext * for a list of supported platforms. * + * @param type The type of image to return. + * @param converter A set of conversion functions that will be invoked to + * convert the underlying pixel data if necessary. You may omit this + * function if the requested conversion does not change the pixel format. * @return The current image as an ImageBitmap object. */ - getImage(type: MPImageStorageType.IMAGE_BITMAP): ImageBitmap; + getImage( + type: MPImageStorageType.IMAGE_BITMAP, + converter?: MPImageChannelConverter): ImageBitmap; /** * Returns the underlying image as a `WebGLTexture` object. Note that this * involves a CPU to GPU transfer if the current image is only available as * an `ImageData` object. The returned texture is bound to the current * canvas (see `.canvas`). * + * @param type The type of image to return. + * @param converter A set of conversion functions that will be invoked to + * convert the underlying pixel data if necessary. You may omit this + * function if the requested conversion does not change the pixel format. * @return The current image as a WebGLTexture. */ - getImage(type: MPImageStorageType.WEBGL_TEXTURE): WebGLTexture; - getImage(type?: MPImageStorageType): MPImageNativeContainer { + getImage( + type: MPImageStorageType.WEBGL_TEXTURE, + converter?: MPImageChannelConverter): WebGLTexture; + getImage(type?: MPImageStorageType, converter?: MPImageChannelConverter): + MPImageNativeContainer { + const internalConverter = {...DEFAULT_CONVERTER, ...converter}; switch (type) { + case MPImageStorageType.UINT8_CLAMPED_ARRAY: + return this.convertToUint8ClampedArray(internalConverter); + case MPImageStorageType.FLOAT32_ARRAY: + return this.convertToFloat32Array(internalConverter); case MPImageStorageType.IMAGE_DATA: - return this.convertToImageData(); + return this.convertToImageData(internalConverter); case MPImageStorageType.IMAGE_BITMAP: - return this.convertToImageBitmap(); + return this.convertToImageBitmap(internalConverter); case MPImageStorageType.WEBGL_TEXTURE: - return this.convertToWebGLTexture(); + return this.convertToWebGLTexture(internalConverter); default: throw new Error(`Type is not supported: ${type}`); } } + + private getContainer(type: MPImageStorageType.UINT8_CLAMPED_ARRAY): + Uint8ClampedArray|undefined; + private getContainer(type: MPImageStorageType.FLOAT32_ARRAY): Float32Array + |undefined; private getContainer(type: MPImageStorageType.IMAGE_DATA): ImageData |undefined; private getContainer(type: MPImageStorageType.IMAGE_BITMAP): ImageBitmap @@ -348,9 +515,14 @@ export class MPImage { |undefined; private getContainer(type: MPImageStorageType): MPImageNativeContainer |undefined; + /** Returns the container for the requested storage type iff it exists. */ private getContainer(type: MPImageStorageType): MPImageNativeContainer |undefined { switch (type) { + case MPImageStorageType.UINT8_CLAMPED_ARRAY: + return this.containers.find(img => img instanceof Uint8ClampedArray); + case MPImageStorageType.FLOAT32_ARRAY: + return this.containers.find(img => img instanceof Float32Array); case MPImageStorageType.IMAGE_DATA: return this.containers.find(img => img instanceof ImageData); case MPImageStorageType.IMAGE_BITMAP: @@ -377,7 +549,11 @@ export class MPImage { for (const container of this.containers) { let destinationContainer: MPImageNativeContainer; - if (container instanceof ImageData) { + if (container instanceof Uint8ClampedArray) { + destinationContainer = new Uint8ClampedArray(container); + } else if (container instanceof Float32Array) { + destinationContainer = new Float32Array(container); + } else if (container instanceof ImageData) { destinationContainer = new ImageData(container.data, this.width, this.height); } else if (container instanceof WebGLTexture) { @@ -406,7 +582,7 @@ export class MPImage { this.unbindTexture(); } else if (container instanceof ImageBitmap) { - this.convertToWebGLTexture(); + this.convertToWebGLTexture(DEFAULT_CONVERTER); this.bindTexture(); destinationContainer = this.copyTextureToBitmap(); this.unbindTexture(); @@ -423,7 +599,6 @@ export class MPImage { this.shaderContext, this.width, this.height); } - private getOffscreenCanvas(): OffscreenCanvas { if (!(this.canvas instanceof OffscreenCanvas)) { throw new Error( @@ -455,48 +630,131 @@ export class MPImage { return this.shaderContext; } - private convertToImageBitmap(): ImageBitmap { + private convertToImageBitmap(converter: Required): + ImageBitmap { let imageBitmap = this.getContainer(MPImageStorageType.IMAGE_BITMAP); if (!imageBitmap) { - this.convertToWebGLTexture(); + this.convertToWebGLTexture(converter); imageBitmap = this.convertWebGLTextureToImageBitmap(); this.containers.push(imageBitmap); this.ownsImageBitmap = true; } + return imageBitmap; } - private convertToImageData(): ImageData { + private convertToImageData(converter: Required): + ImageData { let imageData = this.getContainer(MPImageStorageType.IMAGE_DATA); if (!imageData) { - const gl = this.getGL(); - const shaderContext = this.getShaderContext(); - const pixels = new Uint8Array(this.width * this.height * 4); + if (this.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)) { + const source = + this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY)!; + const destination = new Uint8ClampedArray(this.width * this.height * 4); + for (let i = 0; i < this.width * this.height; i++) { + const rgba = converter.uint8ToRGBAConverter(source[i]); + destination[i * 4] = rgba[0]; + destination[i * 4 + 1] = rgba[1]; + destination[i * 4 + 2] = rgba[2]; + destination[i * 4 + 3] = rgba[3]; + } + imageData = new ImageData(destination, this.width, this.height); + this.containers.push(imageData); + } else if (this.hasType(MPImageStorageType.FLOAT32_ARRAY)) { + const source = this.getContainer(MPImageStorageType.FLOAT32_ARRAY)!; + const destination = new Uint8ClampedArray(this.width * this.height * 4); + for (let i = 0; i < this.width * this.height; i++) { + const rgba = converter.floatToRGBAConverter(source[i]); + destination[i * 4] = rgba[0]; + destination[i * 4 + 1] = rgba[1]; + destination[i * 4 + 2] = rgba[2]; + destination[i * 4 + 3] = rgba[3]; + } + imageData = new ImageData(destination, this.width, this.height); + this.containers.push(imageData); + } else if ( + this.hasType(MPImageStorageType.IMAGE_BITMAP) || + this.hasType(MPImageStorageType.WEBGL_TEXTURE)) { + const gl = this.getGL(); + const shaderContext = this.getShaderContext(); + const pixels = new Uint8Array(this.width * this.height * 4); - // Create texture if needed - const webGLTexture = this.convertToWebGLTexture(); + // Create texture if needed + const webGlTexture = this.convertToWebGLTexture(converter); - // Create a framebuffer from the texture and read back pixels - shaderContext.bindFramebuffer(gl, webGLTexture); - gl.readPixels( - 0, 0, this.width, this.height, gl.RGBA, gl.UNSIGNED_BYTE, pixels); - shaderContext.unbindFramebuffer(); + // Create a framebuffer from the texture and read back pixels + shaderContext.bindFramebuffer(gl, webGlTexture); + gl.readPixels( + 0, 0, this.width, this.height, gl.RGBA, gl.UNSIGNED_BYTE, pixels); + shaderContext.unbindFramebuffer(); - imageData = new ImageData( - new Uint8ClampedArray(pixels.buffer), this.width, this.height); - this.containers.push(imageData); + imageData = new ImageData( + new Uint8ClampedArray(pixels.buffer), this.width, this.height); + this.containers.push(imageData); + } else { + throw new Error('Couldn\t find backing image for ImageData conversion'); + } } return imageData; } - private convertToWebGLTexture(): WebGLTexture { + private convertToUint8ClampedArray( + converter: Required): Uint8ClampedArray { + let uint8ClampedArray = + this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY); + if (!uint8ClampedArray) { + if (this.hasType(MPImageStorageType.FLOAT32_ARRAY)) { + const source = this.getContainer(MPImageStorageType.FLOAT32_ARRAY)!; + uint8ClampedArray = new Uint8ClampedArray( + source.map(v => converter.floatToUint8Converter(v))); + } else { + const source = this.convertToImageData(converter).data; + uint8ClampedArray = new Uint8ClampedArray(this.width * this.height); + for (let i = 0; i < this.width * this.height; i++) { + uint8ClampedArray[i] = converter.rgbaToUint8Converter( + source[i * 4], source[i * 4 + 1], source[i * 4 + 2], + source[i * 4 + 3]); + } + } + this.containers.push(uint8ClampedArray); + } + + return uint8ClampedArray; + } + + private convertToFloat32Array(converter: Required): + Float32Array { + let float32Array = this.getContainer(MPImageStorageType.FLOAT32_ARRAY); + if (!float32Array) { + if (this.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)) { + const source = + this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY)!; + float32Array = new Float32Array(source).map( + v => converter.uint8ToFloatConverter(v)); + } else { + const source = this.convertToImageData(converter).data; + float32Array = new Float32Array(this.width * this.height); + for (let i = 0; i < this.width * this.height; i++) { + float32Array[i] = converter.rgbaToFloatConverter( + source[i * 4], source[i * 4 + 1], source[i * 4 + 2], + source[i * 4 + 3]); + } + } + this.containers.push(float32Array); + } + + return float32Array; + } + + private convertToWebGLTexture(converter: Required): + WebGLTexture { let webGLTexture = this.getContainer(MPImageStorageType.WEBGL_TEXTURE); if (!webGLTexture) { const gl = this.getGL(); webGLTexture = this.bindTexture(); const source = this.getContainer(MPImageStorageType.IMAGE_BITMAP) || - this.convertToImageData(); + this.convertToImageData(converter); gl.texImage2D( gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, source); this.unbindTexture();