Add support for single-channel images to MPImage

PiperOrigin-RevId: 527629970
This commit is contained in:
Sebastian Schmidt 2023-04-27 11:10:00 -07:00 committed by Copybara-Service
parent a5852b0513
commit 1b82821f15
2 changed files with 341 additions and 36 deletions

View File

@ -29,17 +29,19 @@ if (skip) {
/** The image types supported by MPImage. */
type ImageType = ImageData|ImageBitmap|WebGLTexture;
const IMAGE_2_2 = [1, 0, 0, 255, 2, 0, 0, 255, 3, 0, 0, 255, 4, 0, 0, 255];
const IMAGE_2_1 = [1, 0, 0, 255, 2, 0, 0, 255];
const IMAGE_2_2 = [1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255, 4, 4, 4, 255];
const IMAGE_2_1 = [1, 1, 1, 255, 2, 2, 2, 255];
const IMAGE_2_3 = [
1, 0, 0, 255, 2, 0, 0, 255, 3, 0, 0, 255,
4, 0, 0, 255, 5, 0, 0, 255, 6, 0, 0, 255
1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255,
4, 4, 4, 255, 5, 5, 5, 255, 6, 6, 6, 255
];
/** The test images and data to use for the unit tests below. */
class MPImageTestContext {
canvas!: OffscreenCanvas;
gl!: WebGL2RenderingContext;
uint8ClampedArray!: Uint8ClampedArray;
float32Array!: Float32Array;
imageData!: ImageData;
imageBitmap!: ImageBitmap;
webGLTexture!: WebGLTexture;
@ -52,6 +54,13 @@ class MPImageTestContext {
this.gl = this.canvas.getContext('webgl2') as WebGL2RenderingContext;
const gl = this.gl;
this.uint8ClampedArray = new Uint8ClampedArray(pixels.length / 4);
this.float32Array = new Float32Array(pixels.length / 4);
for (let i = 0; i < this.uint8ClampedArray.length; ++i) {
this.uint8ClampedArray[i] = pixels[i * 4];
this.float32Array[i] = pixels[i * 4] / 255;
}
this.imageData =
new ImageData(new Uint8ClampedArray(pixels), width, height);
this.imageBitmap = await createImageBitmap(this.imageData);
@ -65,6 +74,10 @@ class MPImageTestContext {
get(type: unknown) {
switch (type) {
case Uint8ClampedArray:
return this.uint8ClampedArray;
case Float32Array:
return this.float32Array;
case ImageData:
return this.imageData;
case ImageBitmap:
@ -116,7 +129,13 @@ class MPImageTestContext {
}
function assertEquality(image: MPImage, expected: ImageType): void {
if (expected instanceof ImageData) {
if (expected instanceof Uint8ClampedArray) {
const result = image.getImage(MPImageStorageType.UINT8_CLAMPED_ARRAY);
expect(result).toEqual(expected);
} else if (expected instanceof Float32Array) {
const result = image.getImage(MPImageStorageType.FLOAT32_ARRAY);
expect(result).toEqual(expected);
} else if (expected instanceof ImageData) {
const result = image.getImage(MPImageStorageType.IMAGE_DATA);
expect(result).toEqual(expected);
} else if (expected instanceof ImageBitmap) {
@ -158,7 +177,9 @@ class MPImageTestContext {
shaderContext.close();
}
const sources = skip ? [] : [ImageData, ImageBitmap, WebGLTexture];
const sources = skip ?
[] :
[Uint8ClampedArray, Float32Array, ImageData, ImageBitmap, WebGLTexture];
for (let i = 0; i < sources.length; i++) {
for (let j = 0; j < sources.length; j++) {
@ -220,15 +241,41 @@ class MPImageTestContext {
const shaderContext = new MPImageShaderContext();
const image = createImage(shaderContext, context.imageData, WIDTH, HEIGHT);
expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true);
expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(false);
expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(false);
expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false);
expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false);
image.getImage(MPImageStorageType.UINT8_CLAMPED_ARRAY);
expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true);
expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(false);
expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false);
expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false);
image.getImage(MPImageStorageType.FLOAT32_ARRAY);
expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true);
expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(false);
expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false);
image.getImage(MPImageStorageType.WEBGL_TEXTURE);
expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true);
expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(true);
expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(false);
image.getImage(MPImageStorageType.IMAGE_BITMAP);
expect(image.hasType(MPImageStorageType.IMAGE_DATA)).toBe(true);
expect(image.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.FLOAT32_ARRAY)).toBe(true);
expect(image.hasType(MPImageStorageType.WEBGL_TEXTURE)).toBe(true);
expect(image.hasType(MPImageStorageType.IMAGE_BITMAP)).toBe(true);

View File

@ -16,6 +16,12 @@
/** The underlying type of the image. */
export enum MPImageStorageType {
/** Represents the native `UInt8ClampedArray` type. */
UINT8_CLAMPED_ARRAY,
/**
* Represents the native `Float32Array` type. Values range from [0.0, 1.0].
*/
FLOAT32_ARRAY,
/** Represents the native `ImageData` type. */
IMAGE_DATA,
/** Represents the native `ImageBitmap` type. */
@ -213,6 +219,7 @@ export class MPImageShaderContext {
return result;
}
/**
* Binds a framebuffer to the canvas. If the framebuffer does not yet exist,
* creates it first. Binds the provided texture to the framebuffer.
@ -251,6 +258,97 @@ export class MPImageShaderContext {
}
}
/**
* An interface that can be used to provide custom conversion functions. These
* functions are invoked to convert pixel values between different channel
* counts and value ranges. Any conversion function that is not specified will
* result in a default conversion.
*/
export interface MPImageChannelConverter {
/**
* A conversion function to convert a number in the [0.0, 1.0] range to RGBA.
* The output is an array with four elemeents whose values range from 0 to 255
* inclusive.
*
* The default conversion function is `[v * 255, v * 255, v * 255, 255]`
* and will log a warning if invoked.
*/
floatToRGBAConverter?: (value: number) => [number, number, number, number];
/*
* A conversion function to convert a number in the [0, 255] range to RGBA.
* The output is an array with four elemeents whose values range from 0 to 255
* inclusive.
*
* The default conversion function is `[v, v , v , 255]` and will log a
* warning if invoked.
*/
uint8ToRGBAConverter?: (value: number) => [number, number, number, number];
/**
* A conversion function to convert an RGBA value in the range of 0 to 255 to
* a single value in the [0.0, 1.0] range.
*
* The default conversion function is `(r / 3 + g / 3 + b / 3) / 255` and will
* log a warning if invoked.
*/
rgbaToFloatConverter?: (r: number, g: number, b: number, a: number) => number;
/**
* A conversion function to convert an RGBA value in the range of 0 to 255 to
* a single value in the [0, 255] range.
*
* The default conversion function is `r / 3 + g / 3 + b / 3` and will log a
* warning if invoked.
*/
rgbaToUint8Converter?: (r: number, g: number, b: number, a: number) => number;
/**
* A conversion function to convert a single value in the 0.0 to 1.0 range to
* [0, 255].
*
* The default conversion function is `r * 255` and will log a warning if
* invoked.
*/
floatToUint8Converter?: (value: number) => number;
/**
* A conversion function to convert a single value in the 0 to 255 range to
* [0.0, 1.0] .
*
* The default conversion function is `r / 255` and will log a warning if
* invoked.
*/
uint8ToFloatConverter?: (value: number) => number;
}
const DEFAULT_CONVERTER: Required<MPImageChannelConverter> = {
floatToRGBAConverter: v => {
console.log('Using default floatToRGBAConverter');
return [v * 255, v * 255, v * 255, 255];
},
uint8ToRGBAConverter: v => {
console.log('Using default uint8ToRGBAConverter');
return [v, v, v, 255];
},
rgbaToFloatConverter: (r, g, b) => {
console.log('Using default floatToRGBAConverter');
return (r / 3 + g / 3 + b / 3) / 255;
},
rgbaToUint8Converter: (r, g, b) => {
console.log('Using default rgbaToUint8Converter');
return r / 3 + g / 3 + b / 3;
},
floatToUint8Converter: v => {
console.log('Using default floatToUint8Converter');
return v * 255;
},
uint8ToFloatConverter: v => {
console.log('Using default uint8ToFloatConverter');
return v / 255;
},
};
/**
* The wrapper class for MediaPipe Image objects.
*
@ -270,6 +368,14 @@ export class MPImageShaderContext {
* initialized with an `OffscreenCanvas`. As we require WebGL2 support, this
* places some limitations on Browser support as outlined here:
* https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas/getContext
*
* Some MediaPipe tasks return single channel masks. These masks are stored
* using an underlying `Uint8ClampedArray` an `Float32Array` (represented as
* single-channel arrays). To convert these type to other formats a conversion
* function is invoked to convert pixel values between single channel and four
* channel RGBA values. To customize this conversion, you can specify these
* conversion functions when you invoke `getImage()`. If you use the default
* conversion function a warning will be logged to the console.
*/
export class MPImage {
private gl?: WebGL2RenderingContext;
@ -297,49 +403,110 @@ export class MPImage {
return !!this.getContainer(type);
}
/**
* Returns the underlying image as a single channel `Uint8ClampedArray`. Note
* that this involves an expensive GPU to CPU transfer if the current image is
* only available as an `ImageBitmap` or `WebGLTexture`. If necessary, this
* function converts RGBA data pixel-by-pixel to a single channel value by
* invoking a conversion function (see class comment for detail).
*
* @param type The type of image to return.
* @param converter A set of conversion functions that will be invoked to
* convert the underlying pixel data if necessary. You may omit this
* function if the requested conversion does not change the pixel format.
* @return The current data as a Uint8ClampedArray.
*/
getImage(
type: MPImageStorageType.UINT8_CLAMPED_ARRAY,
converter?: MPImageChannelConverter): Uint8ClampedArray;
/**
* Returns the underlying image as a single channel `Float32Array`. Note
* that this involves an expensive GPU to CPU transfer if the current image is
* only available as an `ImageBitmap` or `WebGLTexture`. If necessary, this
* function converts RGBA data pixel-by-pixel to a single channel value by
* invoking a conversion function (see class comment for detail).
*
* @param type The type of image to return.
* @param converter A set of conversion functions that will be invoked to
* convert the underlying pixel data if necessary. You may omit this
* function if the requested conversion does not change the pixel format.
* @return The current image as a Float32Array.
*/
getImage(
type: MPImageStorageType.FLOAT32_ARRAY,
converter?: MPImageChannelConverter): Float32Array;
/**
* Returns the underlying image as an `ImageData` object. Note that this
* involves an expensive GPU to CPU transfer if the current image is only
* available as an `ImageBitmap` or `WebGLTexture`.
* available as an `ImageBitmap` or `WebGLTexture`. If necessary, this
* function converts single channel pixel values to RGBA by invoking a
* conversion function (see class comment for detail).
*
* @return The current image as an ImageData object.
*/
getImage(type: MPImageStorageType.IMAGE_DATA): ImageData;
getImage(
type: MPImageStorageType.IMAGE_DATA,
converter?: MPImageChannelConverter): ImageData;
/**
* Returns the underlying image as an `ImageBitmap`. Note that
* conversions to `ImageBitmap` are expensive, especially if the data
* currently resides on CPU.
* currently resides on CPU. If necessary, this function first converts single
* channel pixel values to RGBA by invoking a conversion function (see class
* comment for detail).
*
* Processing with `ImageBitmap`s requires that the MediaPipe Task was
* initialized with an `OffscreenCanvas` with WebGL2 support. See
* https://developer.mozilla.org/en-US/docs/Web/API/OffscreenCanvas/getContext
* for a list of supported platforms.
*
* @param type The type of image to return.
* @param converter A set of conversion functions that will be invoked to
* convert the underlying pixel data if necessary. You may omit this
* function if the requested conversion does not change the pixel format.
* @return The current image as an ImageBitmap object.
*/
getImage(type: MPImageStorageType.IMAGE_BITMAP): ImageBitmap;
getImage(
type: MPImageStorageType.IMAGE_BITMAP,
converter?: MPImageChannelConverter): ImageBitmap;
/**
* Returns the underlying image as a `WebGLTexture` object. Note that this
* involves a CPU to GPU transfer if the current image is only available as
* an `ImageData` object. The returned texture is bound to the current
* canvas (see `.canvas`).
*
* @param type The type of image to return.
* @param converter A set of conversion functions that will be invoked to
* convert the underlying pixel data if necessary. You may omit this
* function if the requested conversion does not change the pixel format.
* @return The current image as a WebGLTexture.
*/
getImage(type: MPImageStorageType.WEBGL_TEXTURE): WebGLTexture;
getImage(type?: MPImageStorageType): MPImageNativeContainer {
getImage(
type: MPImageStorageType.WEBGL_TEXTURE,
converter?: MPImageChannelConverter): WebGLTexture;
getImage(type?: MPImageStorageType, converter?: MPImageChannelConverter):
MPImageNativeContainer {
const internalConverter = {...DEFAULT_CONVERTER, ...converter};
switch (type) {
case MPImageStorageType.UINT8_CLAMPED_ARRAY:
return this.convertToUint8ClampedArray(internalConverter);
case MPImageStorageType.FLOAT32_ARRAY:
return this.convertToFloat32Array(internalConverter);
case MPImageStorageType.IMAGE_DATA:
return this.convertToImageData();
return this.convertToImageData(internalConverter);
case MPImageStorageType.IMAGE_BITMAP:
return this.convertToImageBitmap();
return this.convertToImageBitmap(internalConverter);
case MPImageStorageType.WEBGL_TEXTURE:
return this.convertToWebGLTexture();
return this.convertToWebGLTexture(internalConverter);
default:
throw new Error(`Type is not supported: ${type}`);
}
}
private getContainer(type: MPImageStorageType.UINT8_CLAMPED_ARRAY):
Uint8ClampedArray|undefined;
private getContainer(type: MPImageStorageType.FLOAT32_ARRAY): Float32Array
|undefined;
private getContainer(type: MPImageStorageType.IMAGE_DATA): ImageData
|undefined;
private getContainer(type: MPImageStorageType.IMAGE_BITMAP): ImageBitmap
@ -348,9 +515,14 @@ export class MPImage {
|undefined;
private getContainer(type: MPImageStorageType): MPImageNativeContainer
|undefined;
/** Returns the container for the requested storage type iff it exists. */
private getContainer(type: MPImageStorageType): MPImageNativeContainer
|undefined {
switch (type) {
case MPImageStorageType.UINT8_CLAMPED_ARRAY:
return this.containers.find(img => img instanceof Uint8ClampedArray);
case MPImageStorageType.FLOAT32_ARRAY:
return this.containers.find(img => img instanceof Float32Array);
case MPImageStorageType.IMAGE_DATA:
return this.containers.find(img => img instanceof ImageData);
case MPImageStorageType.IMAGE_BITMAP:
@ -377,7 +549,11 @@ export class MPImage {
for (const container of this.containers) {
let destinationContainer: MPImageNativeContainer;
if (container instanceof ImageData) {
if (container instanceof Uint8ClampedArray) {
destinationContainer = new Uint8ClampedArray(container);
} else if (container instanceof Float32Array) {
destinationContainer = new Float32Array(container);
} else if (container instanceof ImageData) {
destinationContainer =
new ImageData(container.data, this.width, this.height);
} else if (container instanceof WebGLTexture) {
@ -406,7 +582,7 @@ export class MPImage {
this.unbindTexture();
} else if (container instanceof ImageBitmap) {
this.convertToWebGLTexture();
this.convertToWebGLTexture(DEFAULT_CONVERTER);
this.bindTexture();
destinationContainer = this.copyTextureToBitmap();
this.unbindTexture();
@ -423,7 +599,6 @@ export class MPImage {
this.shaderContext, this.width, this.height);
}
private getOffscreenCanvas(): OffscreenCanvas {
if (!(this.canvas instanceof OffscreenCanvas)) {
throw new Error(
@ -455,48 +630,131 @@ export class MPImage {
return this.shaderContext;
}
private convertToImageBitmap(): ImageBitmap {
private convertToImageBitmap(converter: Required<MPImageChannelConverter>):
ImageBitmap {
let imageBitmap = this.getContainer(MPImageStorageType.IMAGE_BITMAP);
if (!imageBitmap) {
this.convertToWebGLTexture();
this.convertToWebGLTexture(converter);
imageBitmap = this.convertWebGLTextureToImageBitmap();
this.containers.push(imageBitmap);
this.ownsImageBitmap = true;
}
return imageBitmap;
}
private convertToImageData(): ImageData {
private convertToImageData(converter: Required<MPImageChannelConverter>):
ImageData {
let imageData = this.getContainer(MPImageStorageType.IMAGE_DATA);
if (!imageData) {
const gl = this.getGL();
const shaderContext = this.getShaderContext();
const pixels = new Uint8Array(this.width * this.height * 4);
if (this.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)) {
const source =
this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY)!;
const destination = new Uint8ClampedArray(this.width * this.height * 4);
for (let i = 0; i < this.width * this.height; i++) {
const rgba = converter.uint8ToRGBAConverter(source[i]);
destination[i * 4] = rgba[0];
destination[i * 4 + 1] = rgba[1];
destination[i * 4 + 2] = rgba[2];
destination[i * 4 + 3] = rgba[3];
}
imageData = new ImageData(destination, this.width, this.height);
this.containers.push(imageData);
} else if (this.hasType(MPImageStorageType.FLOAT32_ARRAY)) {
const source = this.getContainer(MPImageStorageType.FLOAT32_ARRAY)!;
const destination = new Uint8ClampedArray(this.width * this.height * 4);
for (let i = 0; i < this.width * this.height; i++) {
const rgba = converter.floatToRGBAConverter(source[i]);
destination[i * 4] = rgba[0];
destination[i * 4 + 1] = rgba[1];
destination[i * 4 + 2] = rgba[2];
destination[i * 4 + 3] = rgba[3];
}
imageData = new ImageData(destination, this.width, this.height);
this.containers.push(imageData);
} else if (
this.hasType(MPImageStorageType.IMAGE_BITMAP) ||
this.hasType(MPImageStorageType.WEBGL_TEXTURE)) {
const gl = this.getGL();
const shaderContext = this.getShaderContext();
const pixels = new Uint8Array(this.width * this.height * 4);
// Create texture if needed
const webGLTexture = this.convertToWebGLTexture();
// Create texture if needed
const webGlTexture = this.convertToWebGLTexture(converter);
// Create a framebuffer from the texture and read back pixels
shaderContext.bindFramebuffer(gl, webGLTexture);
gl.readPixels(
0, 0, this.width, this.height, gl.RGBA, gl.UNSIGNED_BYTE, pixels);
shaderContext.unbindFramebuffer();
// Create a framebuffer from the texture and read back pixels
shaderContext.bindFramebuffer(gl, webGlTexture);
gl.readPixels(
0, 0, this.width, this.height, gl.RGBA, gl.UNSIGNED_BYTE, pixels);
shaderContext.unbindFramebuffer();
imageData = new ImageData(
new Uint8ClampedArray(pixels.buffer), this.width, this.height);
this.containers.push(imageData);
imageData = new ImageData(
new Uint8ClampedArray(pixels.buffer), this.width, this.height);
this.containers.push(imageData);
} else {
throw new Error('Couldn\t find backing image for ImageData conversion');
}
}
return imageData;
}
private convertToWebGLTexture(): WebGLTexture {
private convertToUint8ClampedArray(
converter: Required<MPImageChannelConverter>): Uint8ClampedArray {
let uint8ClampedArray =
this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY);
if (!uint8ClampedArray) {
if (this.hasType(MPImageStorageType.FLOAT32_ARRAY)) {
const source = this.getContainer(MPImageStorageType.FLOAT32_ARRAY)!;
uint8ClampedArray = new Uint8ClampedArray(
source.map(v => converter.floatToUint8Converter(v)));
} else {
const source = this.convertToImageData(converter).data;
uint8ClampedArray = new Uint8ClampedArray(this.width * this.height);
for (let i = 0; i < this.width * this.height; i++) {
uint8ClampedArray[i] = converter.rgbaToUint8Converter(
source[i * 4], source[i * 4 + 1], source[i * 4 + 2],
source[i * 4 + 3]);
}
}
this.containers.push(uint8ClampedArray);
}
return uint8ClampedArray;
}
private convertToFloat32Array(converter: Required<MPImageChannelConverter>):
Float32Array {
let float32Array = this.getContainer(MPImageStorageType.FLOAT32_ARRAY);
if (!float32Array) {
if (this.hasType(MPImageStorageType.UINT8_CLAMPED_ARRAY)) {
const source =
this.getContainer(MPImageStorageType.UINT8_CLAMPED_ARRAY)!;
float32Array = new Float32Array(source).map(
v => converter.uint8ToFloatConverter(v));
} else {
const source = this.convertToImageData(converter).data;
float32Array = new Float32Array(this.width * this.height);
for (let i = 0; i < this.width * this.height; i++) {
float32Array[i] = converter.rgbaToFloatConverter(
source[i * 4], source[i * 4 + 1], source[i * 4 + 2],
source[i * 4 + 3]);
}
}
this.containers.push(float32Array);
}
return float32Array;
}
private convertToWebGLTexture(converter: Required<MPImageChannelConverter>):
WebGLTexture {
let webGLTexture = this.getContainer(MPImageStorageType.WEBGL_TEXTURE);
if (!webGLTexture) {
const gl = this.getGL();
webGLTexture = this.bindTexture();
const source = this.getContainer(MPImageStorageType.IMAGE_BITMAP) ||
this.convertToImageData();
this.convertToImageData(converter);
gl.texImage2D(
gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, source);
this.unbindTexture();