From 47e217896c53e47d420830daaa38449afa247a8b Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 15 Nov 2023 06:10:05 -0800 Subject: [PATCH] Add drawConfidenceMask() to our public API PiperOrigin-RevId: 582647409 --- mediapipe/tasks/web/vision/core/BUILD | 6 +- .../web/vision/core/drawing_utils.test.ts | 94 +++++++++++++ .../tasks/web/vision/core/drawing_utils.ts | 75 +++++++++++ .../core/drawing_utils_confidence_mask.ts | 125 ++++++++++++++++++ mediapipe/tasks/web/vision/core/image.ts | 31 +---- mediapipe/tasks/web/vision/core/mask.ts | 31 ++--- .../tasks/web/vision/core/render_utils.ts | 31 ----- 7 files changed, 309 insertions(+), 84 deletions(-) create mode 100644 mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts delete mode 100644 mediapipe/tasks/web/vision/core/render_utils.ts diff --git a/mediapipe/tasks/web/vision/core/BUILD b/mediapipe/tasks/web/vision/core/BUILD index 31bad937d..db9c27e0f 100644 --- a/mediapipe/tasks/web/vision/core/BUILD +++ b/mediapipe/tasks/web/vision/core/BUILD @@ -34,6 +34,7 @@ mediapipe_ts_library( srcs = [ "drawing_utils.ts", "drawing_utils_category_mask.ts", + "drawing_utils_confidence_mask.ts", ], deps = [ ":image", @@ -149,11 +150,6 @@ mediapipe_ts_library( ], ) -mediapipe_ts_library( - name = "render_utils", - srcs = ["render_utils.ts"], -) - jasmine_node_test( name = "vision_task_runner_test", deps = [":vision_task_runner_test_lib"], diff --git a/mediapipe/tasks/web/vision/core/drawing_utils.test.ts b/mediapipe/tasks/web/vision/core/drawing_utils.test.ts index b5ba8e9a4..aaef42bbf 100644 --- a/mediapipe/tasks/web/vision/core/drawing_utils.test.ts +++ b/mediapipe/tasks/web/vision/core/drawing_utils.test.ts @@ -59,6 +59,100 @@ if (skip) { drawingUtilsWebGL.close(); }); + describe( + 'drawConfidenceMask() blends background with foreground color', () => { + const foreground = new ImageData( + new Uint8ClampedArray( + [0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255]), + WIDTH, HEIGHT); + const background = [255, 255, 255, 255]; + const expectedResult = new Uint8Array([ + 255, 255, 255, 255, 178, 178, 178, 255, 102, 102, 102, 255, 0, 0, 0, + 255 + ]); + + it('on 2D canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array([0.0, 0.3, 0.6, 1.0])], + /* ownsWebGLTexture= */ false, canvas2D, shaderContext, WIDTH, + HEIGHT); + + drawingUtils2D.drawConfidenceMask( + confidenceMask, background, foreground); + + const actualResult = context2D.getImageData(0, 0, WIDTH, HEIGHT).data; + expect(actualResult) + .toEqual(new Uint8ClampedArray(expectedResult.buffer)); + }); + + it('on WebGL canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array( + [0.6, 1.0, 0.0, 0.3])], // Note: Vertically flipped + /* ownsWebGLTexture= */ false, canvasWebGL, shaderContext, WIDTH, + HEIGHT); + + drawingUtilsWebGL.drawConfidenceMask( + confidenceMask, background, foreground); + + const actualResult = new Uint8Array(WIDTH * HEIGHT * 4); + contextWebGL.readPixels( + 0, 0, WIDTH, HEIGHT, contextWebGL.RGBA, + contextWebGL.UNSIGNED_BYTE, actualResult); + expect(actualResult).toEqual(expectedResult); + }); + }); + + + describe( + 'drawConfidenceMask() blends background with foreground image', () => { + const foreground = new ImageData( + new Uint8ClampedArray( + [0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255]), + WIDTH, HEIGHT); + const background = new ImageData( + new Uint8ClampedArray([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255 + ]), + WIDTH, HEIGHT); + const expectedResult = new Uint8Array([ + 255, 255, 255, 255, 178, 178, 178, 255, 102, 102, 102, 255, 0, 0, 0, + 255 + ]); + + it('on 2D canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array([0.0, 0.3, 0.6, 1.0])], + /* ownsWebGLTexture= */ false, canvas2D, shaderContext, WIDTH, + HEIGHT); + + drawingUtils2D.drawConfidenceMask( + confidenceMask, background, foreground); + + const actualResult = context2D.getImageData(0, 0, WIDTH, HEIGHT).data; + expect(actualResult) + .toEqual(new Uint8ClampedArray(expectedResult.buffer)); + }); + + it('on WebGL canvas', () => { + const confidenceMask = new MPMask( + [new Float32Array( + [0.6, 1.0, 0.0, 0.3])], // Note: Vertically flipped + /* ownsWebGLTexture= */ false, canvasWebGL, shaderContext, WIDTH, + HEIGHT); + + drawingUtilsWebGL.drawConfidenceMask( + confidenceMask, background, foreground); + + const actualResult = new Uint8Array(WIDTH * HEIGHT * 4); + contextWebGL.readPixels( + 0, 0, WIDTH, HEIGHT, contextWebGL.RGBA, + contextWebGL.UNSIGNED_BYTE, actualResult); + expect(actualResult).toEqual(expectedResult); + }); + }); + describe('drawCategoryMask() ', () => { const colors = [ [0, 0, 0, 255], diff --git a/mediapipe/tasks/web/vision/core/drawing_utils.ts b/mediapipe/tasks/web/vision/core/drawing_utils.ts index 796d7dcb6..154420f6b 100644 --- a/mediapipe/tasks/web/vision/core/drawing_utils.ts +++ b/mediapipe/tasks/web/vision/core/drawing_utils.ts @@ -17,6 +17,7 @@ import {BoundingBox} from '../../../../tasks/web/components/containers/bounding_box'; import {NormalizedLandmark} from '../../../../tasks/web/components/containers/landmark'; import {CategoryMaskShaderContext, CategoryToColorMap, RGBAColor} from '../../../../tasks/web/vision/core/drawing_utils_category_mask'; +import {ConfidenceMaskShaderContext} from '../../../../tasks/web/vision/core/drawing_utils_confidence_mask'; import {MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; import {MPMask} from '../../../../tasks/web/vision/core/mask'; import {Connection} from '../../../../tasks/web/vision/core/types'; @@ -115,6 +116,7 @@ export {RGBAColor, CategoryToColorMap}; /** Helper class to visualize the result of a MediaPipe Vision task. */ export class DrawingUtils { private categoryMaskShaderContext?: CategoryMaskShaderContext; + private confidenceMaskShaderContext?: ConfidenceMaskShaderContext; private convertToWebGLTextureShaderContext?: MPImageShaderContext; private readonly context2d?: CanvasRenderingContext2D| OffscreenCanvasRenderingContext2D; @@ -213,6 +215,13 @@ export class DrawingUtils { return this.categoryMaskShaderContext; } + private getConfidenceMaskShaderContext(): ConfidenceMaskShaderContext { + if (!this.confidenceMaskShaderContext) { + this.confidenceMaskShaderContext = new ConfidenceMaskShaderContext(); + } + return this.confidenceMaskShaderContext; + } + /** * Draws circles onto the provided landmarks. * @@ -422,6 +431,70 @@ export class DrawingUtils { callback(mask.getAsWebGLTexture()); } } + + /** Draws a confidence mask on a WebGL2RenderingContext2D. */ + private drawConfidenceMaskWebGL( + maskTexture: WebGLTexture, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + const gl = this.getWebGLRenderingContext(); + const shaderContext = this.getConfidenceMaskShaderContext(); + const defaultImage = Array.isArray(defaultTexture) ? + new ImageData(new Uint8ClampedArray(defaultTexture), 1, 1) : + defaultTexture; + const overlayImage = Array.isArray(overlayTexture) ? + new ImageData(new Uint8ClampedArray(overlayTexture), 1, 1) : + overlayTexture; + + shaderContext.run(gl, /* flipTexturesVertically= */ true, () => { + shaderContext.bindAndUploadTextures( + defaultImage, overlayImage, maskTexture); + gl.clearColor(0, 0, 0, 0); + gl.clear(gl.COLOR_BUFFER_BIT); + gl.drawArrays(gl.TRIANGLE_FAN, 0, 4); + gl.bindTexture(gl.TEXTURE_2D, null); + shaderContext.unbindTextures(); + }); + } + + /** Draws a confidence mask on a CanvasRenderingContext2D. */ + private drawConfidenceMask2D( + mask: MPMask, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + // Use the WebGL renderer to draw result on our internal canvas. + const gl = this.getWebGLRenderingContext(); + this.runWithWebGLTexture(mask, texture => { + this.drawConfidenceMaskWebGL(texture, defaultTexture, overlayTexture); + // Draw the result on the user canvas. + const ctx = this.getCanvasRenderingContext(); + ctx.drawImage(gl.canvas, 0, 0, ctx.canvas.width, ctx.canvas.height); + }); + } + + /** + * Blends two images using the provided confidence mask. + * + * If you are using an `ImageData` or `HTMLImageElement` as your data source + * and drawing the result onto a `WebGL2RenderingContext`, this method uploads + * the image data to the GPU. For still image input that gets re-used every + * frame, you can reduce the cost of re-uploading these images by passing a + * `HTMLCanvasElement` instead. + * + * @param mask A confidence mask that was returned from a segmentation task. + * @param defaultTexture An image or a four-channel color that will be used + * when confidence values are low. + * @param overlayTexture An image or four-channel color that will be used when + * confidence values are high. + */ + drawConfidenceMask( + mask: MPMask, defaultTexture: RGBAColor|ImageSource, + overlayTexture: RGBAColor|ImageSource): void { + if (this.context2d) { + this.drawConfidenceMask2D(mask, defaultTexture, overlayTexture); + } else { + this.drawConfidenceMaskWebGL( + mask.getAsWebGLTexture(), defaultTexture, overlayTexture); + } + } /** * Frees all WebGL resources held by this class. * @export @@ -429,6 +502,8 @@ export class DrawingUtils { close(): void { this.categoryMaskShaderContext?.close(); this.categoryMaskShaderContext = undefined; + this.confidenceMaskShaderContext?.close(); + this.confidenceMaskShaderContext = undefined; this.convertToWebGLTextureShaderContext?.close(); this.convertToWebGLTextureShaderContext = undefined; } diff --git a/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts b/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts new file mode 100644 index 000000000..c8d30c9ee --- /dev/null +++ b/mediapipe/tasks/web/vision/core/drawing_utils_confidence_mask.ts @@ -0,0 +1,125 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {assertNotNull, MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; +import {ImageSource} from '../../../../web/graph_runner/graph_runner'; + +/** + * A fragment shader that blends a default image and overlay texture based on an + * input texture that contains confidence values. + */ +const FRAGMENT_SHADER = ` + precision mediump float; + uniform sampler2D maskTexture; + uniform sampler2D defaultTexture; + uniform sampler2D overlayTexture; + varying vec2 vTex; + void main() { + float confidence = texture2D(maskTexture, vTex).r; + vec4 defaultColor = texture2D(defaultTexture, vTex); + vec4 overlayColor = texture2D(overlayTexture, vTex); + // Apply the alpha from the overlay and merge in the default color + overlayColor = mix(defaultColor, overlayColor, overlayColor.a); + gl_FragColor = mix(defaultColor, overlayColor, confidence); + } + `; + +/** A drawing util class for confidence masks. */ +export class ConfidenceMaskShaderContext extends MPImageShaderContext { + defaultTexture?: WebGLTexture; + overlayTexture?: WebGLTexture; + defaultTextureUniform?: WebGLUniformLocation; + overlayTextureUniform?: WebGLUniformLocation; + maskTextureUniform?: WebGLUniformLocation; + + protected override getFragmentShader(): string { + return FRAGMENT_SHADER; + } + + protected override setupTextures(): void { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + this.defaultTexture = this.createTexture(gl); + gl.activeTexture(gl.TEXTURE1); + this.overlayTexture = this.createTexture(gl); + } + + protected override setupShaders(): void { + super.setupShaders(); + const gl = this.gl!; + this.defaultTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'defaultTexture'), + 'Uniform location'); + this.overlayTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'overlayTexture'), + 'Uniform location'); + this.maskTextureUniform = assertNotNull( + gl.getUniformLocation(this.program!, 'maskTexture'), + 'Uniform location'); + } + + protected override configureUniforms(): void { + super.configureUniforms(); + const gl = this.gl!; + gl.uniform1i(this.defaultTextureUniform!, 0); + gl.uniform1i(this.overlayTextureUniform!, 1); + gl.uniform1i(this.maskTextureUniform!, 2); + } + + bindAndUploadTextures( + defaultImage: ImageSource, overlayImage: ImageSource, + confidenceMask: WebGLTexture) { + // TODO: We should avoid uploading textures from CPU to GPU + // if the textures haven't changed. This can lead to drastic performance + // slowdowns (~50ms per frame). Users can reduce the penalty by passing a + // canvas object instead of ImageData/HTMLImageElement. + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, this.defaultTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, defaultImage); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, this.overlayTexture!); + gl.texImage2D( + gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, overlayImage); + + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, confidenceMask); + } + + unbindTextures() { + const gl = this.gl!; + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, null); + + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, null); + + gl.activeTexture(gl.TEXTURE2); + gl.bindTexture(gl.TEXTURE_2D, null); + } + + override close(): void { + if (this.defaultTexture) { + this.gl!.deleteTexture(this.defaultTexture); + } + if (this.overlayTexture) { + this.gl!.deleteTexture(this.overlayTexture); + } + super.close(); + } +} diff --git a/mediapipe/tasks/web/vision/core/image.ts b/mediapipe/tasks/web/vision/core/image.ts index 570d32318..bb88c0ee1 100644 --- a/mediapipe/tasks/web/vision/core/image.ts +++ b/mediapipe/tasks/web/vision/core/image.ts @@ -198,10 +198,8 @@ export class MPImage { // Create a new texture and use it to back a framebuffer gl.activeTexture(gl.TEXTURE1); - destinationContainer = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + destinationContainer = shaderContext.createTexture(gl); gl.bindTexture(gl.TEXTURE_2D, destinationContainer); - this.configureTextureParams(); gl.texImage2D( gl.TEXTURE_2D, 0, gl.RGBA, this.width, this.height, 0, gl.RGBA, gl.UNSIGNED_BYTE, null); @@ -252,7 +250,7 @@ export class MPImage { } if (!this.gl) { this.gl = assertNotNull( - this.canvas.getContext('webgl2') as WebGL2RenderingContext | null, + this.canvas.getContext('webgl2'), 'You cannot use a canvas that is already bound to a different ' + 'type of rendering context.'); } @@ -317,20 +315,6 @@ export class MPImage { return webGLTexture; } - /** Sets texture params for the currently bound texture. */ - private configureTextureParams() { - const gl = this.getGL(); - // `gl.LINEAR` might break rendering for some textures, but it allows us to - // do smooth resizing. Ideally, this would be user-configurable, but for now - // we hard-code the value here to `gl.LINEAR` (versus `gl.NEAREST` for - // `MPMask` where we do not want to interpolate mask values, especially for - // category masks). - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR); - } - /** * Binds the backing texture to the canvas. If the texture does not yet * exist, creates it first. @@ -343,16 +327,13 @@ export class MPImage { let webGLTexture = this.getContainer(MPImageType.WEBGL_TEXTURE); if (!webGLTexture) { - webGLTexture = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + const shaderContext = this.getShaderContext(); + webGLTexture = shaderContext.createTexture(gl); this.containers.push(webGLTexture); this.ownsWebGLTexture = true; - - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); - this.configureTextureParams(); - } else { - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); } + + gl.bindTexture(gl.TEXTURE_2D, webGLTexture); return webGLTexture; } diff --git a/mediapipe/tasks/web/vision/core/mask.ts b/mediapipe/tasks/web/vision/core/mask.ts index 6ef852508..b463589e4 100644 --- a/mediapipe/tasks/web/vision/core/mask.ts +++ b/mediapipe/tasks/web/vision/core/mask.ts @@ -215,10 +215,8 @@ export class MPMask { // Create a new texture and use it to back a framebuffer gl.activeTexture(gl.TEXTURE1); - destinationContainer = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + destinationContainer = shaderContext.createTexture(gl, gl.NEAREST); gl.bindTexture(gl.TEXTURE_2D, destinationContainer); - this.configureTextureParams(); const format = this.getTexImage2DFormat(); gl.texImage2D( gl.TEXTURE_2D, 0, format, this.width, this.height, 0, gl.RED, @@ -339,19 +337,6 @@ export class MPMask { return webGLTexture; } - /** Sets texture params for the currently bound texture. */ - private configureTextureParams() { - const gl = this.getGL(); - // `gl.NEAREST` ensures that we do not get interpolated values for - // masks. In some cases, the user might want interpolation (e.g. for - // confidence masks), so we might want to make this user-configurable. - // Note that `MPImage` uses `gl.LINEAR`. - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); - gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); - } - /** * Binds the backing texture to the canvas. If the texture does not yet * exist, creates it first. @@ -364,17 +349,17 @@ export class MPMask { let webGLTexture = this.getContainer(MPMaskType.WEBGL_TEXTURE); if (!webGLTexture) { - webGLTexture = - assertNotNull(gl.createTexture(), 'Failed to create texture'); + const shaderContext = this.getShaderContext(); + // `gl.NEAREST` ensures that we do not get interpolated values for + // masks. In some cases, the user might want interpolation (e.g. for + // confidence masks), so we might want to make this user-configurable. + // Note that `MPImage` uses `gl.LINEAR`. + webGLTexture = shaderContext.createTexture(gl, gl.NEAREST); this.containers.push(webGLTexture); this.ownsWebGLTexture = true; - - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); - this.configureTextureParams(); - } else { - gl.bindTexture(gl.TEXTURE_2D, webGLTexture); } + gl.bindTexture(gl.TEXTURE_2D, webGLTexture); return webGLTexture; } diff --git a/mediapipe/tasks/web/vision/core/render_utils.ts b/mediapipe/tasks/web/vision/core/render_utils.ts deleted file mode 100644 index 3ee981bab..000000000 --- a/mediapipe/tasks/web/vision/core/render_utils.ts +++ /dev/null @@ -1,31 +0,0 @@ -/** @fileoverview Utility functions used in the vision demos. */ - -/** - * Copyright 2023 The MediaPipe Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Helper function to draw a confidence mask */ -export function drawConfidenceMask( - ctx: CanvasRenderingContext2D, image: Float32Array, width: number, - height: number): void { - const uint8Array = new Uint8ClampedArray(width * height * 4); - for (let i = 0; i < image.length; i++) { - uint8Array[4 * i] = 128; - uint8Array[4 * i + 1] = 0; - uint8Array[4 * i + 2] = 0; - uint8Array[4 * i + 3] = image[i] * 255; - } - ctx.putImageData(new ImageData(uint8Array, width, height), 0, 0); -}