Create non-callback APIs for APIs that return callbacks.

PiperOrigin-RevId: 529799515
2023-05-05 13:26:29 -07:00 · 2023-05-05 13:26:29 -07:00 · f065910559
commit f065910559
parent ecc8dca8ba
10 changed files with 465 additions and 132 deletions
--- a/mediapipe/tasks/web/vision/core/image.ts
+++ b/mediapipe/tasks/web/vision/core/image.ts
@ -273,9 +273,13 @@ export class MPImage {
      case MPImageType.IMAGE_DATA:
        return this.containers.find(img => img instanceof ImageData);
      case MPImageType.IMAGE_BITMAP:
-        return this.containers.find(img => img instanceof ImageBitmap);
+        return this.containers.find(
+            img => typeof ImageBitmap !== 'undefined' &&
+                img instanceof ImageBitmap);
      case MPImageType.WEBGL_TEXTURE:
-        return this.containers.find(img => img instanceof WebGLTexture);
+        return this.containers.find(
+            img => typeof WebGLTexture !== 'undefined' &&
+                img instanceof WebGLTexture);
      default:
        throw new Error(`Type is not supported: ${type}`);
    }
--- a/mediapipe/tasks/web/vision/core/vision_task_runner.ts
+++ b/mediapipe/tasks/web/vision/core/vision_task_runner.ts
@ -230,7 +230,8 @@ export abstract class VisionTaskRunner extends TaskRunner {
   * (adding an alpha channel if necessary), passes through WebGLTextures and
   * throws for Float32Array-backed images.
   */
-  protected convertToMPImage(wasmImage: WasmImage): MPImage {
+  protected convertToMPImage(wasmImage: WasmImage, shouldCopyData: boolean):
+      MPImage {
    const {data, width, height} = wasmImage;
    const pixels = width * height;

@ -263,10 +264,11 @@ export abstract class VisionTaskRunner extends TaskRunner {
      container = data;
    }

-    return new MPImage(
-      [container], /* ownsImageBitmap= */ false, /* ownsWebGLTexture= */ false,
-      this.graphRunner.wasmModule.canvas!, this.shaderContext, width,
-      height);
+    const image = new MPImage(
+        [container], /* ownsImageBitmap= */ false,
+        /* ownsWebGLTexture= */ false, this.graphRunner.wasmModule.canvas!,
+        this.shaderContext, width, height);
+    return shouldCopyData ? image.clone() : image;
  }

  /** Closes and cleans up the resources held by this task. */
--- a/mediapipe/tasks/web/vision/face_stylizer/face_stylizer.ts
+++ b/mediapipe/tasks/web/vision/face_stylizer/face_stylizer.ts
@ -50,7 +50,8 @@ export type FaceStylizerCallback = (image: MPImage|null) => void;

 /** Performs face stylization on images. */
 export class FaceStylizer extends VisionTaskRunner {
-  private userCallback: FaceStylizerCallback = () => {};
+  private userCallback?: FaceStylizerCallback;
+  private result?: MPImage|null;
  private readonly options: FaceStylizerGraphOptionsProto;

  /**
@ -130,21 +131,58 @@ export class FaceStylizer extends VisionTaskRunner {
    return super.applyOptions(options);
  }

-
  /**
-   * Performs face stylization on the provided single image. The method returns
-   * synchronously once the callback returns. Only use this method when the
-   * FaceStylizer is created with the image running mode.
+   * Performs face stylization on the provided single image and invokes the
+   * callback with result. The method returns synchronously once the callback
+   * returns. Only use this method when the FaceStylizer is created with the
+   * image running mode.
   *
   * @param image An image to process.
-   * @param callback The callback that is invoked with the stylized image. The
-   *    lifetime of the returned data is only guaranteed for the duration of the
-   *    callback.
+   * @param callback The callback that is invoked with the stylized image or
+   *    `null` if no face was detected. The lifetime of the returned data is
+   *     only guaranteed for the duration of the callback.
   */
  stylize(image: ImageSource, callback: FaceStylizerCallback): void;
  /**
-   * Performs face stylization on the provided single image. The method returns
-   * synchronously once the callback returns. Only use this method when the
+   * Performs face stylization on the provided single image and invokes the
+   * callback with result. The method returns synchronously once the callback
+   * returns. Only use this method when the FaceStylizer is created with the
+   * image running mode.
+   *
+   * The 'imageProcessingOptions' parameter can be used to specify one or all
+   * of:
+   *  - the rotation to apply to the image before performing stylization, by
+   *    setting its 'rotationDegrees' property.
+   *  - the region-of-interest on which to perform stylization, by setting its
+   *   'regionOfInterest' property. If not specified, the full image is used.
+   *  If both are specified, the crop around the region-of-interest is extracted
+   *  first, then the specified rotation is applied to the crop.
+   *
+   * @param image An image to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @param callback The callback that is invoked with the stylized image or
+   *    `null` if no face was detected. The lifetime of the returned data is
+   *    only guaranteed for the duration of the callback.
+   */
+  stylize(
+      image: ImageSource, imageProcessingOptions: ImageProcessingOptions,
+      callback: FaceStylizerCallback): void;
+  /**
+   * Performs face stylization on the provided single image and returns the
+   * result. This method creates a copy of the resulting image and should not be
+   * used in high-throughput applictions. Only use this method when the
+   * FaceStylizer is created with the image running mode.
+   *
+   * @param image An image to process.
+   * @return A stylized face or `null` if no face was detected. The result is
+   *     copied to avoid lifetime issues.
+   */
+  stylize(image: ImageSource): MPImage|null;
+  /**
+   * Performs face stylization on the provided single image and returns the
+   * result. This method creates a copy of the resulting image and should not be
+   * used in high-throughput applictions. Only use this method when the
   * FaceStylizer is created with the image running mode.
   *
   * The 'imageProcessingOptions' parameter can be used to specify one or all
@ -159,18 +197,16 @@ export class FaceStylizer extends VisionTaskRunner {
   * @param image An image to process.
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
   *    to process the input image before running inference.
-   * @param callback The callback that is invoked with the stylized image. The
-   *    lifetime of the returned data is only guaranteed for the duration of the
-   *    callback.
+   * @return A stylized face or `null` if no face was detected. The result is
+   *     copied to avoid lifetime issues.
   */
-  stylize(
-      image: ImageSource, imageProcessingOptions: ImageProcessingOptions,
-      callback: FaceStylizerCallback): void;
+  stylize(image: ImageSource, imageProcessingOptions: ImageProcessingOptions):
+      MPImage|null;
  stylize(
      image: ImageSource,
-      imageProcessingOptionsOrCallback: ImageProcessingOptions|
+      imageProcessingOptionsOrCallback?: ImageProcessingOptions|
      FaceStylizerCallback,
-      callback?: FaceStylizerCallback): void {
+      callback?: FaceStylizerCallback): MPImage|null|void {
    const imageProcessingOptions =
        typeof imageProcessingOptionsOrCallback !== 'function' ?
        imageProcessingOptionsOrCallback :
@ -178,14 +214,19 @@ export class FaceStylizer extends VisionTaskRunner {

    this.userCallback = typeof imageProcessingOptionsOrCallback === 'function' ?
        imageProcessingOptionsOrCallback :
-        callback!;
+        callback;
    this.processImageData(image, imageProcessingOptions ?? {});
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result;
+    }
  }

  /**
-   * Performs face stylization on the provided video frame. Only use this method
-   * when the FaceStylizer is created with the video running mode.
+   * Performs face stylization on the provided video frame and invokes the
+   * callback with result. The method returns synchronously once the callback
+   * returns. Only use this method when the FaceStylizer is created with the
+   * video running mode.
   *
   * The input frame can be of any size. It's required to provide the video
   * frame's timestamp (in milliseconds). The input timestamps must be
@ -193,16 +234,18 @@ export class FaceStylizer extends VisionTaskRunner {
   *
   * @param videoFrame A video frame to process.
   * @param timestamp The timestamp of the current frame, in ms.
-   * @param callback The callback that is invoked with the stylized image. The
-   *    lifetime of the returned data is only guaranteed for the duration of
-   * the callback.
+   * @param callback The callback that is invoked with the stylized image or
+   *   `null` if no face was detected. The lifetime of the returned data is only
+   *   guaranteed for the duration of the callback.
   */
  stylizeForVideo(
      videoFrame: ImageSource, timestamp: number,
      callback: FaceStylizerCallback): void;
  /**
-   * Performs face stylization on the provided video frame. Only use this
-   * method when the FaceStylizer is created with the video running mode.
+   * Performs face stylization on the provided video frame and invokes the
+   * callback with result. The method returns synchronously once the callback
+   * returns. Only use this method when the FaceStylizer is created with the
+   * video running mode.
   *
   * The 'imageProcessingOptions' parameter can be used to specify one or all
   * of:
@ -221,18 +264,63 @@ export class FaceStylizer extends VisionTaskRunner {
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
   *    to process the input image before running inference.
   * @param timestamp The timestamp of the current frame, in ms.
-   * @param callback The callback that is invoked with the stylized image. The
-   *    lifetime of the returned data is only guaranteed for the duration of
-   * the callback.
+   * @param callback The callback that is invoked with the stylized image or
+   *   `null` if no face was detected. The lifetime of the returned data is only
+   *   guaranteed for the duration of the callback.
   */
  stylizeForVideo(
      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
      timestamp: number, callback: FaceStylizerCallback): void;
+  /**
+   * Performs face stylization on the provided video frame. This method creates
+   * a copy of the resulting image and should not be used in high-throughput
+   * applictions. Only use this method when the FaceStylizer is created with the
+   * video running mode.
+   *
+   * The input frame can be of any size. It's required to provide the video
+   * frame's timestamp (in milliseconds). The input timestamps must be
+   * monotonically increasing.
+   *
+   * @param videoFrame A video frame to process.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @return A stylized face or `null` if no face was detected. The result is
+   *     copied to avoid lifetime issues.
+   */
+  stylizeForVideo(videoFrame: ImageSource, timestamp: number): MPImage|null;
+  /**
+   * Performs face stylization on the provided video frame. This method creates
+   * a copy of the resulting image and should not be used in high-throughput
+   * applictions. Only use this method when the FaceStylizer is created with the
+   * video running mode.
+   *
+   * The 'imageProcessingOptions' parameter can be used to specify one or all
+   * of:
+   *  - the rotation to apply to the image before performing stylization, by
+   *    setting its 'rotationDegrees' property.
+   *  - the region-of-interest on which to perform stylization, by setting its
+   *   'regionOfInterest' property. If not specified, the full image is used.
+   *  If both are specified, the crop around the region-of-interest is
+   * extracted first, then the specified rotation is applied to the crop.
+   *
+   * The input frame can be of any size. It's required to provide the video
+   * frame's timestamp (in milliseconds). The input timestamps must be
+   * monotonically increasing.
+   *
+   * @param videoFrame A video frame to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @return A stylized face or `null` if no face was detected. The result is
+   *     copied to avoid lifetime issues.
+   */
+  stylizeForVideo(
+      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
+      timestamp: number): MPImage|null;
  stylizeForVideo(
      videoFrame: ImageSource,
      timestampOrImageProcessingOptions: number|ImageProcessingOptions,
-      timestampOrCallback: number|FaceStylizerCallback,
-      callback?: FaceStylizerCallback): void {
+      timestampOrCallback?: number|FaceStylizerCallback,
+      callback?: FaceStylizerCallback): MPImage|null|void {
    const imageProcessingOptions =
        typeof timestampOrImageProcessingOptions !== 'number' ?
        timestampOrImageProcessingOptions :
@ -243,9 +331,13 @@ export class FaceStylizer extends VisionTaskRunner {

    this.userCallback = typeof timestampOrCallback === 'function' ?
        timestampOrCallback :
-        callback!;
+        callback;
    this.processVideoData(videoFrame, imageProcessingOptions, timestamp);
-    this.userCallback = () => {};
+    this.userCallback = undefined;
+
+    if (!this.userCallback) {
+      return this.result;
+    }
  }

  /** Updates the MediaPipe graph configuration. */
@ -270,13 +362,20 @@ export class FaceStylizer extends VisionTaskRunner {

    this.graphRunner.attachImageListener(
        STYLIZED_IMAGE_STREAM, (wasmImage, timestamp) => {
-          const mpImage = this.convertToMPImage(wasmImage);
+          const mpImage = this.convertToMPImage(
+              wasmImage, /* shouldCopyData= */ !this.userCallback);
+          this.result = mpImage;
+          if (this.userCallback) {
            this.userCallback(mpImage);
+          }
          this.setLatestOutputTimestamp(timestamp);
        });
    this.graphRunner.attachEmptyPacketListener(
        STYLIZED_IMAGE_STREAM, timestamp => {
+          this.result = null;
+          if (this.userCallback) {
            this.userCallback(null);
+          }
          this.setLatestOutputTimestamp(timestamp);
        });

--- a/mediapipe/tasks/web/vision/face_stylizer/face_stylizer_test.ts
+++ b/mediapipe/tasks/web/vision/face_stylizer/face_stylizer_test.ts
@ -99,6 +99,30 @@ describe('FaceStylizer', () => {
        ]);
  });

+  it('returns result', () => {
+    if (typeof ImageData === 'undefined') {
+      console.log('ImageData tests are not supported on Node');
+      return;
+    }
+
+    // Pass the test data to our listener
+    faceStylizer.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      verifyListenersRegistered(faceStylizer);
+      faceStylizer.imageListener!
+          ({data: new Uint8ClampedArray([1, 1, 1, 1]), width: 1, height: 1},
+           /* timestamp= */ 1337);
+    });
+
+    // Invoke the face stylizeer
+    const image = faceStylizer.stylize({} as HTMLImageElement);
+    expect(faceStylizer.fakeWasmModule._waitUntilIdle).toHaveBeenCalled();
+    expect(image).not.toBeNull();
+    expect(image!.has(MPImage.TYPE.IMAGE_DATA)).toBeTrue();
+    expect(image!.width).toEqual(1);
+    expect(image!.height).toEqual(1);
+    image!.close();
+  });
+
  it('invokes callback', (done) => {
    if (typeof ImageData === 'undefined') {
      console.log('ImageData tests are not supported on Node');
@ -125,28 +149,7 @@ describe('FaceStylizer', () => {
    });
  });

-  it('invokes callback even when no faes are detected', (done) => {
-    if (typeof ImageData === 'undefined') {
-      console.log('ImageData tests are not supported on Node');
-      done();
-      return;
-    }
-
-    // Pass the test data to our listener
-    faceStylizer.fakeWasmModule._waitUntilIdle.and.callFake(() => {
-      verifyListenersRegistered(faceStylizer);
-      faceStylizer.emptyPacketListener!(/* timestamp= */ 1337);
-    });
-
-    // Invoke the face stylizeer
-    faceStylizer.stylize({} as HTMLImageElement, image => {
-      expect(faceStylizer.fakeWasmModule._waitUntilIdle).toHaveBeenCalled();
-      expect(image).toBeNull();
-      done();
-    });
-  });
-
-  it('invokes callback even when no faes are detected', (done) => {
+  it('invokes callback even when no faces are detected', (done) => {
    // Pass the test data to our listener
    faceStylizer.fakeWasmModule._waitUntilIdle.and.callFake(() => {
      verifyListenersRegistered(faceStylizer);
--- a/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts
+++ b/mediapipe/tasks/web/vision/image_segmenter/image_segmenter.ts
@ -60,7 +60,7 @@ export type ImageSegmenterCallback = (result: ImageSegmenterResult) => void;
 export class ImageSegmenter extends VisionTaskRunner {
  private result: ImageSegmenterResult = {};
  private labels: string[] = [];
-  private userCallback: ImageSegmenterCallback = () => {};
+  private userCallback?: ImageSegmenterCallback;
  private outputCategoryMask = DEFAULT_OUTPUT_CATEGORY_MASK;
  private outputConfidenceMasks = DEFAULT_OUTPUT_CONFIDENCE_MASKS;
  private readonly options: ImageSegmenterGraphOptionsProto;
@ -224,22 +224,51 @@ export class ImageSegmenter extends VisionTaskRunner {
  segment(
      image: ImageSource, imageProcessingOptions: ImageProcessingOptions,
      callback: ImageSegmenterCallback): void;
+  /**
+   * Performs image segmentation on the provided single image and returns the
+   * segmentation result. This method creates a copy of the resulting masks and
+   * should not be used in high-throughput applictions. Only use this method
+   * when the ImageSegmenter is created with running mode `image`.
+   *
+   * @param image An image to process.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     issues.
+   */
+  segment(image: ImageSource): ImageSegmenterResult;
+  /**
+   * Performs image segmentation on the provided single image and returns the
+   * segmentation result. This method creates a copy of the resulting masks and
+   * should not be used in high-v applictions. Only use this method when
+   * the ImageSegmenter is created with running mode `image`.
+   *
+   * @param image An image to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     issues.
+   */
+  segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions):
+      ImageSegmenterResult;
  segment(
      image: ImageSource,
-      imageProcessingOptionsOrCallback: ImageProcessingOptions|
+      imageProcessingOptionsOrCallback?: ImageProcessingOptions|
      ImageSegmenterCallback,
-      callback?: ImageSegmenterCallback): void {
+      callback?: ImageSegmenterCallback): ImageSegmenterResult|void {
    const imageProcessingOptions =
        typeof imageProcessingOptionsOrCallback !== 'function' ?
        imageProcessingOptionsOrCallback :
        {};
+
    this.userCallback = typeof imageProcessingOptionsOrCallback === 'function' ?
        imageProcessingOptionsOrCallback :
-        callback!;
+        callback;

    this.reset();
    this.processImageData(image, imageProcessingOptions);
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result;
+    }
  }

  /**
@ -265,7 +294,7 @@ export class ImageSegmenter extends VisionTaskRunner {
   *
   * @param videoFrame A video frame to process.
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
-   *    to process the input image before running inference.
+   *    to process the input frame before running inference.
   * @param timestamp The timestamp of the current frame, in ms.
   * @param callback The callback that is invoked with the segmented masks. The
   *    lifetime of the returned data is only guaranteed for the duration of the
@ -274,11 +303,41 @@ export class ImageSegmenter extends VisionTaskRunner {
  segmentForVideo(
      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
      timestamp: number, callback: ImageSegmenterCallback): void;
+  /**
+   * Performs image segmentation on the provided video frame and returns the
+   * segmentation result. This method creates a copy of the resulting masks and
+   * should not be used in high-throughput applictions. Only use this method
+   * when the ImageSegmenter is created with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     issues.
+   */
+  segmentForVideo(videoFrame: ImageSource, timestamp: number):
+      ImageSegmenterResult;
+  /**
+   * Performs image segmentation on the provided video frame and returns the
+   * segmentation result. This method creates a copy of the resulting masks and
+   * should not be used in high-v applictions. Only use this method when
+   * the ImageSegmenter is created with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input frame before running inference.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     issues.
+   */
+  segmentForVideo(
+      videoFrame: ImageSource,
+      imageProcessingOptions: ImageProcessingOptions,
+      timestamp: number,
+      ): ImageSegmenterResult;
  segmentForVideo(
      videoFrame: ImageSource,
      timestampOrImageProcessingOptions: number|ImageProcessingOptions,
-      timestampOrCallback: number|ImageSegmenterCallback,
-      callback?: ImageSegmenterCallback): void {
+      timestampOrCallback?: number|ImageSegmenterCallback,
+      callback?: ImageSegmenterCallback): ImageSegmenterResult|void {
    const imageProcessingOptions =
        typeof timestampOrImageProcessingOptions !== 'number' ?
        timestampOrImageProcessingOptions :
@ -288,11 +347,14 @@ export class ImageSegmenter extends VisionTaskRunner {
        timestampOrCallback as number;
    this.userCallback = typeof timestampOrCallback === 'function' ?
        timestampOrCallback :
-        callback!;
+        callback;

    this.reset();
    this.processVideoData(videoFrame, imageProcessingOptions, timestamp);
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result;
+    }
  }

  /**
@ -323,8 +385,10 @@ export class ImageSegmenter extends VisionTaskRunner {
      return;
    }

+    if (this.userCallback) {
      this.userCallback(this.result);
    }
+  }

  /** Updates the MediaPipe graph configuration. */
  protected override refreshGraph(): void {
@ -351,8 +415,9 @@ export class ImageSegmenter extends VisionTaskRunner {

      this.graphRunner.attachImageVectorListener(
          CONFIDENCE_MASKS_STREAM, (masks, timestamp) => {
-            this.result.confidenceMasks =
-                masks.map(wasmImage => this.convertToMPImage(wasmImage));
+            this.result.confidenceMasks = masks.map(
+                wasmImage => this.convertToMPImage(
+                    wasmImage, /* shouldCopyData= */ !this.userCallback));
            this.setLatestOutputTimestamp(timestamp);
            this.maybeInvokeCallback();
          });
@ -370,7 +435,8 @@ export class ImageSegmenter extends VisionTaskRunner {

      this.graphRunner.attachImageListener(
          CATEGORY_MASK_STREAM, (mask, timestamp) => {
-            this.result.categoryMask = this.convertToMPImage(mask);
+            this.result.categoryMask = this.convertToMPImage(
+                mask, /* shouldCopyData= */ !this.userCallback);
            this.setLatestOutputTimestamp(timestamp);
            this.maybeInvokeCallback();
          });
--- a/mediapipe/tasks/web/vision/image_segmenter/image_segmenter_test.ts
+++ b/mediapipe/tasks/web/vision/image_segmenter/image_segmenter_test.ts
@ -292,4 +292,21 @@ describe('ImageSegmenter', () => {
      });
    });
  });
+
+  it('returns result', () => {
+    const confidenceMask = new Float32Array([0.0]);
+
+    // Pass the test data to our listener
+    imageSegmenter.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      imageSegmenter.confidenceMasksListener!(
+          [
+            {data: confidenceMask, width: 1, height: 1},
+          ],
+          1337);
+    });
+
+    const result = imageSegmenter.segment({} as HTMLImageElement);
+    expect(result.confidenceMasks![0]).toBeInstanceOf(MPImage);
+    result.confidenceMasks![0].close();
+  });
 });
--- a/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts
+++ b/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter.ts
@ -86,7 +86,7 @@ export class InteractiveSegmenter extends VisionTaskRunner {
  private result: InteractiveSegmenterResult = {};
  private outputCategoryMask = DEFAULT_OUTPUT_CATEGORY_MASK;
  private outputConfidenceMasks = DEFAULT_OUTPUT_CONFIDENCE_MASKS;
-  private userCallback: InteractiveSegmenterCallback = () => {};
+  private userCallback?: InteractiveSegmenterCallback;
  private readonly options: ImageSegmenterGraphOptionsProto;
  private readonly segmenterOptions: SegmenterOptionsProto;

@ -186,14 +186,9 @@ export class InteractiveSegmenter extends VisionTaskRunner {

  /**
   * Performs interactive segmentation on the provided single image and invokes
-   * the callback with the response.  The `roi` parameter is used to represent a
-   * user's region of interest for segmentation.
-   *
-   * If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
-   * of images that represent per-category segmented image mask. If the
-   * output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
-   * images that contains only one confidence image mask. The method returns
-   * synchronously once the callback returns.
+   * the callback with the response. The method returns synchronously once the
+   * callback returns. The `roi` parameter is used to represent a user's region
+   * of interest for segmentation.
   *
   * @param image An image to process.
   * @param roi The region of interest for segmentation.
@ -206,8 +201,9 @@ export class InteractiveSegmenter extends VisionTaskRunner {
      callback: InteractiveSegmenterCallback): void;
  /**
   * Performs interactive segmentation on the provided single image and invokes
-   * the callback with the response. The `roi` parameter is used to represent a
-   * user's region of interest for segmentation.
+   * the callback with the response. The method returns synchronously once the
+   * callback returns. The `roi` parameter is used to represent a user's region
+   * of interest for segmentation.
   *
   * The 'image_processing_options' parameter can be used to specify the
   * rotation to apply to the image before performing segmentation, by setting
@ -215,12 +211,6 @@ export class InteractiveSegmenter extends VisionTaskRunner {
   * using the 'regionOfInterest' field is NOT supported and will result in an
   * error.
   *
-   * If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
-   * of images that represent per-category segmented image mask. If the
-   * output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
-   * images that contains only one confidence image mask. The method returns
-   * synchronously once the callback returns.
-   *
   * @param image An image to process.
   * @param roi The region of interest for segmentation.
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
@ -233,23 +223,63 @@ export class InteractiveSegmenter extends VisionTaskRunner {
      image: ImageSource, roi: RegionOfInterest,
      imageProcessingOptions: ImageProcessingOptions,
      callback: InteractiveSegmenterCallback): void;
+  /**
+   * Performs interactive segmentation on the provided video frame and returns
+   * the segmentation result. This method creates a copy of the resulting masks
+   * and should not be used in high-throughput applictions. The `roi` parameter
+   * is used to represent a user's region of interest for segmentation.
+   *
+   * @param image An image to process.
+   * @param roi The region of interest for segmentation.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     limits.
+   */
+  segment(image: ImageSource, roi: RegionOfInterest):
+      InteractiveSegmenterResult;
+  /**
+   * Performs interactive segmentation on the provided video frame and returns
+   * the segmentation result. This method creates a copy of the resulting masks
+   * and should not be used in high-throughput applictions. The `roi` parameter
+   * is used to represent a user's region of interest for segmentation.
+   *
+   * The 'image_processing_options' parameter can be used to specify the
+   * rotation to apply to the image before performing segmentation, by setting
+   * its 'rotationDegrees' field. Note that specifying a region-of-interest
+   * using the 'regionOfInterest' field is NOT supported and will result in an
+   * error.
+   *
+   * @param image An image to process.
+   * @param roi The region of interest for segmentation.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @return The segmentation result. The data is copied to avoid lifetime
+   *     limits.
+   */
  segment(
      image: ImageSource, roi: RegionOfInterest,
-      imageProcessingOptionsOrCallback: ImageProcessingOptions|
+      imageProcessingOptions: ImageProcessingOptions):
+      InteractiveSegmenterResult;
+  segment(
+      image: ImageSource, roi: RegionOfInterest,
+      imageProcessingOptionsOrCallback?: ImageProcessingOptions|
      InteractiveSegmenterCallback,
-      callback?: InteractiveSegmenterCallback): void {
+      callback?: InteractiveSegmenterCallback): InteractiveSegmenterResult|
+      void {
    const imageProcessingOptions =
        typeof imageProcessingOptionsOrCallback !== 'function' ?
        imageProcessingOptionsOrCallback :
        {};
    this.userCallback = typeof imageProcessingOptionsOrCallback === 'function' ?
        imageProcessingOptionsOrCallback :
-        callback!;
+        callback;

    this.reset();
    this.processRenderData(roi, this.getSynctheticTimestamp());
    this.processImageData(image, imageProcessingOptions);
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result;
+    }
  }

  private reset(): void {
@ -265,8 +295,10 @@ export class InteractiveSegmenter extends VisionTaskRunner {
      return;
    }

+    if (this.userCallback) {
      this.userCallback(this.result);
    }
+  }

  /** Updates the MediaPipe graph configuration. */
  protected override refreshGraph(): void {
@ -295,8 +327,9 @@ export class InteractiveSegmenter extends VisionTaskRunner {

      this.graphRunner.attachImageVectorListener(
          CONFIDENCE_MASKS_STREAM, (masks, timestamp) => {
-            this.result.confidenceMasks =
-                masks.map(wasmImage => this.convertToMPImage(wasmImage));
+            this.result.confidenceMasks = masks.map(
+                wasmImage => this.convertToMPImage(
+                    wasmImage, /* shouldCopyData= */ !this.userCallback));
            this.setLatestOutputTimestamp(timestamp);
            this.maybeInvokeCallback();
          });
@ -314,7 +347,8 @@ export class InteractiveSegmenter extends VisionTaskRunner {

      this.graphRunner.attachImageListener(
          CATEGORY_MASK_STREAM, (mask, timestamp) => {
-            this.result.categoryMask = this.convertToMPImage(mask);
+            this.result.categoryMask = this.convertToMPImage(
+                mask, /* shouldCopyData= */ !this.userCallback);
            this.setLatestOutputTimestamp(timestamp);
            this.maybeInvokeCallback();
          });
--- a/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter_test.ts
+++ b/mediapipe/tasks/web/vision/interactive_segmenter/interactive_segmenter_test.ts
@ -306,4 +306,22 @@ describe('InteractiveSegmenter', () => {
      });
    });
  });
+
+  it('returns result', () => {
+    const confidenceMask = new Float32Array([0.0]);
+
+    // Pass the test data to our listener
+    interactiveSegmenter.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      interactiveSegmenter.confidenceMasksListener!(
+          [
+            {data: confidenceMask, width: 1, height: 1},
+          ],
+          1337);
+    });
+
+    const result =
+        interactiveSegmenter.segment({} as HTMLImageElement, KEYPOINT);
+    expect(result.confidenceMasks![0]).toBeInstanceOf(MPImage);
+    result.confidenceMasks![0].close();
+  });
 });
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker.ts
@ -64,7 +64,7 @@ export type PoseLandmarkerCallback = (result: PoseLandmarkerResult) => void;
 export class PoseLandmarker extends VisionTaskRunner {
  private result: Partial<PoseLandmarkerResult> = {};
  private outputSegmentationMasks = false;
-  private userCallback: PoseLandmarkerCallback = () => {};
+  private userCallback?: PoseLandmarkerCallback;
  private readonly options: PoseLandmarkerGraphOptions;
  private readonly poseLandmarksDetectorGraphOptions:
      PoseLandmarksDetectorGraphOptions;
@ -200,21 +200,22 @@ export class PoseLandmarker extends VisionTaskRunner {
  }

  /**
-   * Performs pose detection on the provided single image and waits
-   * synchronously for the response. Only use this method when the
-   * PoseLandmarker is created with running mode `image`.
+   * Performs pose detection on the provided single image and invokes the
+   * callback with the response. The method returns synchronously once the
+   * callback returns. Only use this method when the PoseLandmarker is created
+   * with running mode `image`.
   *
   * @param image An image to process.
   * @param callback The callback that is invoked with the result. The
   *    lifetime of the returned masks is only guaranteed for the duration of
   *    the callback.
-   * @return The detected pose landmarks.
   */
  detect(image: ImageSource, callback: PoseLandmarkerCallback): void;
  /**
-   * Performs pose detection on the provided single image and waits
-   * synchronously for the response. Only use this method when the
-   * PoseLandmarker is created with running mode `image`.
+   * Performs pose detection on the provided single image and invokes the
+   * callback with the response. The method returns synchronously once the
+   * callback returns. Only use this method when the PoseLandmarker is created
+   * with running mode `image`.
   *
   * @param image An image to process.
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
@ -222,16 +223,42 @@ export class PoseLandmarker extends VisionTaskRunner {
   * @param callback The callback that is invoked with the result. The
   *    lifetime of the returned masks is only guaranteed for the duration of
   *    the callback.
-   * @return The detected pose landmarks.
   */
  detect(
      image: ImageSource, imageProcessingOptions: ImageProcessingOptions,
      callback: PoseLandmarkerCallback): void;
+  /**
+   * Performs pose detection on the provided single image and waits
+   * synchronously for the response. This method creates a copy of the resulting
+   * masks and should not be used in high-throughput applictions. Only
+   * use this method when the PoseLandmarker is created with running mode
+   * `image`.
+   *
+   * @param image An image to process.
+   * @return The landmarker result. Any masks are copied to avoid lifetime
+   *     limits.
+   * @return The detected pose landmarks.
+   */
+  detect(image: ImageSource): PoseLandmarkerResult;
+  /**
+   * Performs pose detection on the provided single image and waits
+   * synchronously for the response. This method creates a copy of the resulting
+   * masks and should not be used in high-throughput applictions. Only
+   * use this method when the PoseLandmarker is created with running mode
+   * `image`.
+   *
+   * @param image An image to process.
+   * @return The landmarker result. Any masks are copied to avoid lifetime
+   *     limits.
+   * @return The detected pose landmarks.
+   */
+  detect(image: ImageSource, imageProcessingOptions: ImageProcessingOptions):
+      PoseLandmarkerResult;
  detect(
      image: ImageSource,
-      imageProcessingOptionsOrCallback: ImageProcessingOptions|
+      imageProcessingOptionsOrCallback?: ImageProcessingOptions|
      PoseLandmarkerCallback,
-      callback?: PoseLandmarkerCallback): void {
+      callback?: PoseLandmarkerCallback): PoseLandmarkerResult|void {
    const imageProcessingOptions =
        typeof imageProcessingOptionsOrCallback !== 'function' ?
        imageProcessingOptionsOrCallback :
@ -242,28 +269,32 @@ export class PoseLandmarker extends VisionTaskRunner {

    this.resetResults();
    this.processImageData(image, imageProcessingOptions);
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result as PoseLandmarkerResult;
+    }
  }

  /**
-   * Performs pose detection on the provided video frame and waits
-   * synchronously for the response. Only use this method when the
-   * PoseLandmarker is created with running mode `video`.
+   * Performs pose detection on the provided video frame and invokes the
+   * callback with the response. The method returns synchronously once the
+   * callback returns. Only use this method when the PoseLandmarker is created
+   * with running mode `video`.
   *
   * @param videoFrame A video frame to process.
   * @param timestamp The timestamp of the current frame, in ms.
   * @param callback The callback that is invoked with the result. The
   *    lifetime of the returned masks is only guaranteed for the duration of
   *    the callback.
-   * @return The detected pose landmarks.
   */
  detectForVideo(
      videoFrame: ImageSource, timestamp: number,
      callback: PoseLandmarkerCallback): void;
  /**
-   * Performs pose detection on the provided video frame and waits
-   * synchronously for the response. Only use this method when the
-   * PoseLandmarker is created with running mode `video`.
+   * Performs pose detection on the provided video frame and invokes the
+   * callback with the response. The method returns synchronously once the
+   * callback returns. Only use this method when the PoseLandmarker is created
+   * with running mode `video`.
   *
   * @param videoFrame A video frame to process.
   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
@ -272,16 +303,45 @@ export class PoseLandmarker extends VisionTaskRunner {
   * @param callback The callback that is invoked with the result. The
   *    lifetime of the returned masks is only guaranteed for the duration of
   *    the callback.
-   * @return The detected pose landmarks.
   */
  detectForVideo(
      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
      timestamp: number, callback: PoseLandmarkerCallback): void;
+  /**
+   * Performs pose detection on the provided video frame and returns the result.
+   * This method creates a copy of the resulting masks and should not be used
+   * in high-throughput applictions. Only use this method when the
+   * PoseLandmarker is created with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @return The landmarker result. Any masks are copied to extend the
+   *     lifetime of the returned data.
+   */
+  detectForVideo(videoFrame: ImageSource, timestamp: number):
+      PoseLandmarkerResult;
+  /**
+   * Performs pose detection on the provided video frame and returns the result.
+   * This method creates a copy of the resulting masks and should not be used
+   * in high-throughput applictions. The method returns synchronously once the
+   * callback returns. Only use this method when the PoseLandmarker is created
+   * with running mode `video`.
+   *
+   * @param videoFrame A video frame to process.
+   * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+   *    to process the input image before running inference.
+   * @param timestamp The timestamp of the current frame, in ms.
+   * @return The landmarker result. Any masks are copied to extend the lifetime
+   *     of the returned data.
+   */
+  detectForVideo(
+      videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions,
+      timestamp: number): PoseLandmarkerResult;
  detectForVideo(
      videoFrame: ImageSource,
      timestampOrImageProcessingOptions: number|ImageProcessingOptions,
-      timestampOrCallback: number|PoseLandmarkerCallback,
-      callback?: PoseLandmarkerCallback): void {
+      timestampOrCallback?: number|PoseLandmarkerCallback,
+      callback?: PoseLandmarkerCallback): PoseLandmarkerResult|void {
    const imageProcessingOptions =
        typeof timestampOrImageProcessingOptions !== 'number' ?
        timestampOrImageProcessingOptions :
@ -291,10 +351,14 @@ export class PoseLandmarker extends VisionTaskRunner {
        timestampOrCallback as number;
    this.userCallback = typeof timestampOrCallback === 'function' ?
        timestampOrCallback :
-        callback!;
+        callback;
+
    this.resetResults();
    this.processVideoData(videoFrame, imageProcessingOptions, timestamp);
-    this.userCallback = () => {};
+
+    if (!this.userCallback) {
+      return this.result as PoseLandmarkerResult;
+    }
  }

  private resetResults(): void {
@ -315,8 +379,11 @@ export class PoseLandmarker extends VisionTaskRunner {
    if (this.outputSegmentationMasks && !('segmentationMasks' in this.result)) {
      return;
    }
+
+    if (this.userCallback) {
      this.userCallback(this.result as Required<PoseLandmarkerResult>);
    }
+  }

  /** Sets the default values for the graph. */
  private initDefaults(): void {
@ -438,8 +505,9 @@ export class PoseLandmarker extends VisionTaskRunner {
          'SEGMENTATION_MASK:' + SEGMENTATION_MASK_STREAM);
      this.graphRunner.attachImageVectorListener(
          SEGMENTATION_MASK_STREAM, (masks, timestamp) => {
-            this.result.segmentationMasks =
-                masks.map(wasmImage => this.convertToMPImage(wasmImage));
+            this.result.segmentationMasks = masks.map(
+                wasmImage => this.convertToMPImage(
+                    wasmImage, /* shouldCopyData= */ !this.userCallback));
            this.setLatestOutputTimestamp(timestamp);
            this.maybeInvokeCallback();
          });
--- a/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_test.ts
+++ b/mediapipe/tasks/web/vision/pose_landmarker/pose_landmarker_test.ts
@ -331,4 +331,26 @@ describe('PoseLandmarker', () => {
      listenerCalled = true;
    });
  });
+
+  it('returns result', () => {
+    const landmarksProto = [createLandmarks().serializeBinary()];
+    const worldLandmarksProto = [createWorldLandmarks().serializeBinary()];
+
+    // Pass the test data to our listener
+    poseLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => {
+      poseLandmarker.listeners.get('normalized_landmarks')!
+          (landmarksProto, 1337);
+      poseLandmarker.listeners.get('world_landmarks')!
+          (worldLandmarksProto, 1337);
+      poseLandmarker.listeners.get('auxiliary_landmarks')!
+          (landmarksProto, 1337);
+    });
+
+    // Invoke the pose landmarker
+    const result = poseLandmarker.detect({} as HTMLImageElement);
+    expect(poseLandmarker.fakeWasmModule._waitUntilIdle).toHaveBeenCalled();
+    expect(result.landmarks).toEqual([[{'x': 0, 'y': 0, 'z': 0}]]);
+    expect(result.worldLandmarks).toEqual([[{'x': 0, 'y': 0, 'z': 0}]]);
+    expect(result.auxilaryLandmarks).toEqual([[{'x': 0, 'y': 0, 'z': 0}]]);
+  });
 });