Add interactive segmenter java API

PiperOrigin-RevId: 518303391
2023-03-21 09:55:59 -07:00 · 2023-03-21 09:55:59 -07:00 · 2be66e8eb0
commit 2be66e8eb0
parent 6e0542c16a
7 changed files with 739 additions and 0 deletions
--- a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl
+++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl
@ -358,11 +358,21 @@ def mediapipe_java_proto_srcs(name = ""):
        src_out = "com/google/mediapipe/formats/proto/RectProto.java",
    ))
    proto_src_list.append(mediapipe_java_proto_src_extractor(
        target = "//mediapipe/util:color_java_proto_lite",
        src_out = "com/google/mediapipe/util/proto/Color.java",
    ))
    proto_src_list.append(mediapipe_java_proto_src_extractor(
        target = "//mediapipe/util:label_map_java_proto_lite",
        src_out = "com/google/mediapipe/util/proto/LabelMapProto.java",
    ))
    proto_src_list.append(mediapipe_java_proto_src_extractor(
        target = "//mediapipe/util:render_data_java_proto_lite",
        src_out = "com/google/mediapipe/util/proto/RenderData.java",
    ))
    return proto_src_list
 def mediapipe_logging_java_proto_srcs(name = ""):
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
@ -50,6 +50,7 @@ cc_binary(
        "//mediapipe/tasks/cc/vision/image_classifier:image_classifier_graph",
        "//mediapipe/tasks/cc/vision/image_embedder:image_embedder_graph",
        "//mediapipe/tasks/cc/vision/image_segmenter:image_segmenter_graph",
        "//mediapipe/tasks/cc/vision/interactive_segmenter:interactive_segmenter_graph",
        "//mediapipe/tasks/cc/vision/object_detector:object_detector_graph",
        "//mediapipe/tasks/java:version_script.lds",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni:model_resources_cache_jni",
@ -206,6 +207,35 @@ android_library(
    ],
 )
 android_library(
    name = "interactivesegmenter",
    srcs = [
        "imagesegmenter/ImageSegmenterResult.java",
        "interactivesegmenter/InteractiveSegmenter.java",
    ],
    javacopts = [
        "-Xep:AndroidJdkLibsChecker:OFF",
    ],
    manifest = "interactivesegmenter/AndroidManifest.xml",
    deps = [
        ":core",
        "//mediapipe/framework:calculator_options_java_proto_lite",
        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
        "//mediapipe/java/com/google/mediapipe/framework/image",
        "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite",
        "//mediapipe/tasks/cc/vision/image_segmenter/calculators:tensors_to_segmentation_calculator_java_proto_lite",
        "//mediapipe/tasks/cc/vision/image_segmenter/proto:image_segmenter_graph_options_java_proto_lite",
        "//mediapipe/tasks/cc/vision/image_segmenter/proto:segmenter_options_java_proto_lite",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:normalizedkeypoint",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/core",
        "//mediapipe/util:color_java_proto_lite",
        "//mediapipe/util:render_data_java_proto_lite",
        "//third_party:autovalue",
        "@maven//:androidx_annotation_annotation",
        "@maven//:com_google_guava_guava",
    ],
 )
 android_library(
    name = "imageembedder",
    srcs = [
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/interactivesegmenter/AndroidManifest.xml
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/interactivesegmenter/AndroidManifest.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.google.mediapipe.tasks.vision.interactivesegmenter">
    <uses-sdk android:minSdkVersion="24"
        android:targetSdkVersion="30" />
 </manifest>
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/interactivesegmenter/InteractiveSegmenter.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/interactivesegmenter/InteractiveSegmenter.java
@ -0,0 +1,556 @@
 // Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.mediapipe.tasks.vision.interactivesegmenter;
 import android.content.Context;
 import com.google.auto.value.AutoValue;
 import com.google.mediapipe.proto.CalculatorOptionsProto.CalculatorOptions;
 import com.google.mediapipe.proto.CalculatorProto.CalculatorGraphConfig;
 import com.google.mediapipe.framework.AndroidPacketGetter;
 import com.google.mediapipe.framework.MediaPipeException;
 import com.google.mediapipe.framework.Packet;
 import com.google.mediapipe.framework.PacketGetter;
 import com.google.mediapipe.framework.ProtoUtil;
 import com.google.mediapipe.framework.image.BitmapImageBuilder;
 import com.google.mediapipe.framework.image.ByteBufferImageBuilder;
 import com.google.mediapipe.framework.image.MPImage;
 import com.google.mediapipe.tasks.TensorsToSegmentationCalculatorOptionsProto;
 import com.google.mediapipe.tasks.components.containers.NormalizedKeypoint;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.core.ErrorListener;
 import com.google.mediapipe.tasks.core.OutputHandler;
 import com.google.mediapipe.tasks.core.OutputHandler.ResultListener;
 import com.google.mediapipe.tasks.core.TaskInfo;
 import com.google.mediapipe.tasks.core.TaskOptions;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import com.google.mediapipe.tasks.core.proto.BaseOptionsProto;
 import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi;
 import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.imagesegmenter.ImageSegmenterResult;
 import com.google.mediapipe.tasks.vision.imagesegmenter.proto.ImageSegmenterGraphOptionsProto;
 import com.google.mediapipe.tasks.vision.imagesegmenter.proto.SegmenterOptionsProto;
 import com.google.mediapipe.util.proto.ColorProto.Color;
 import com.google.mediapipe.util.proto.RenderDataProto.RenderAnnotation;
 import com.google.mediapipe.util.proto.RenderDataProto.RenderData;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 /**
 * Performs interactive segmentation on images.
 *
 * <p>Note that, in addition to the standard segmentation API {@link segment} that takes an input
 * image and returns the outputs, but involves deep copy of the returns, InteractiveSegmenter also
 * supports the callback API, {@link segmentWithResultListener}, which allows you to access the
 * outputs through zero copy. Set {@link ResultListener} in {@link InteractiveSegmenterOptions}
 * properly to use the callback API.
 *
 * <p>The API expects a TFLite model with,<a
 * href="https://www.tensorflow.org/lite/convert/metadata">TFLite Model Metadata.</a>. The model
 * expects input with 4 channels, where the first 3 channels represent RGB image, and the last
 * channel represents the user's region of interest.
 *
 * <ul>
 *   <li>Input image {@link MPImage}
 *       <ul>
 *         <li>The image that image segmenter runs on.
 *       </ul>
 *   <li>Input roi {@link RegionOfInterest}
 *       <ul>
 *         <li>Region of interest based on user interaction.
 *       </ul>
 *   <li>Output ImageSegmenterResult {@link ImageSegmenterResult}
 *       <ul>
 *         <li>An ImageSegmenterResult containing segmented masks.
 *       </ul>
 * </ul>
 */
 public final class InteractiveSegmenter extends BaseVisionTaskApi {
  private static final String TAG = InteractiveSegmenter.class.getSimpleName();
  private static final String IMAGE_IN_STREAM_NAME = "image_in";
  private static final String ROI_IN_STREAM_NAME = "roi_in";
  private static final String NORM_RECT_IN_STREAM_NAME = "norm_rect_in";
  private static final List<String> INPUT_STREAMS =
      Collections.unmodifiableList(
          Arrays.asList(
              "IMAGE:" + IMAGE_IN_STREAM_NAME,
              "ROI:" + ROI_IN_STREAM_NAME,
              "NORM_RECT:" + NORM_RECT_IN_STREAM_NAME));
  private static final List<String> OUTPUT_STREAMS =
      Collections.unmodifiableList(
          Arrays.asList(
              "GROUPED_SEGMENTATION:segmented_mask_out",
              "IMAGE:image_out",
              "SEGMENTATION:0:segmentation"));
  private static final int GROUPED_SEGMENTATION_OUT_STREAM_INDEX = 0;
  private static final int IMAGE_OUT_STREAM_INDEX = 1;
  private static final int SEGMENTATION_OUT_STREAM_INDEX = 2;
  private static final String TASK_GRAPH_NAME =
      "mediapipe.tasks.vision.interactive_segmenter.InteractiveSegmenterGraph";
  private static final String TENSORS_TO_SEGMENTATION_CALCULATOR_NAME =
      "mediapipe.tasks.TensorsToSegmentationCalculator";
  private boolean hasResultListener = false;
  private List<String> labels = new ArrayList<>();
  static {
    ProtoUtil.registerTypeName(RenderData.class, "mediapipe.RenderData");
  }
  /**
   * Creates an {@link InteractiveSegmenter} instance from an {@link InteractiveSegmenterOptions}.
   *
   * @param context an Android {@link Context}.
   * @param segmenterOptions an {@link InteractiveSegmenterOptions} instance.
   * @throws MediaPipeException if there is an error during {@link InteractiveSegmenter} creation.
   */
  public static InteractiveSegmenter createFromOptions(
      Context context, InteractiveSegmenterOptions segmenterOptions) {
    // TODO: Consolidate OutputHandler and TaskRunner.
    OutputHandler<ImageSegmenterResult, MPImage> handler = new OutputHandler<>();
    handler.setOutputPacketConverter(
        new OutputHandler.OutputPacketConverter<ImageSegmenterResult, MPImage>() {
          @Override
          public ImageSegmenterResult convertToTaskResult(List<Packet> packets)
              throws MediaPipeException {
            if (packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX).isEmpty()) {
              return ImageSegmenterResult.create(
                  new ArrayList<>(),
                  packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX).getTimestamp());
            }
            List<MPImage> segmentedMasks = new ArrayList<>();
            int width = PacketGetter.getImageWidth(packets.get(SEGMENTATION_OUT_STREAM_INDEX));
            int height = PacketGetter.getImageHeight(packets.get(SEGMENTATION_OUT_STREAM_INDEX));
            int imageFormat =
                segmenterOptions.outputType()
                        == InteractiveSegmenterOptions.OutputType.CONFIDENCE_MASK
                    ? MPImage.IMAGE_FORMAT_VEC32F1
                    : MPImage.IMAGE_FORMAT_ALPHA;
            int imageListSize =
                PacketGetter.getImageListSize(packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX));
            ByteBuffer[] buffersArray = new ByteBuffer[imageListSize];
            // If resultListener is not provided, the resulted MPImage is deep copied from mediapipe
            // graph. If provided, the result MPImage is wrapping the mediapipe packet memory.
            if (!segmenterOptions.resultListener().isPresent()) {
              for (int i = 0; i < imageListSize; i++) {
                buffersArray[i] =
                    ByteBuffer.allocateDirect(
                        width * height * (imageFormat == MPImage.IMAGE_FORMAT_VEC32F1 ? 4 : 1));
              }
            }
            if (!PacketGetter.getImageList(
                packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX),
                buffersArray,
                !segmenterOptions.resultListener().isPresent())) {
              throw new MediaPipeException(
                  MediaPipeException.StatusCode.INTERNAL.ordinal(),
                  "There is an error getting segmented masks. It usually results from incorrect"
                      + " options of unsupported OutputType of given model.");
            }
            for (ByteBuffer buffer : buffersArray) {
              ByteBufferImageBuilder builder =
                  new ByteBufferImageBuilder(buffer, width, height, imageFormat);
              segmentedMasks.add(builder.build());
            }
            return ImageSegmenterResult.create(
                segmentedMasks,
                BaseVisionTaskApi.generateResultTimestampMs(
                    RunningMode.IMAGE, packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX)));
          }
          @Override
          public MPImage convertToTaskInput(List<Packet> packets) {
            return new BitmapImageBuilder(
                    AndroidPacketGetter.getBitmapFromRgb(packets.get(IMAGE_OUT_STREAM_INDEX)))
                .build();
          }
        });
    segmenterOptions.resultListener().ifPresent(handler::setResultListener);
    segmenterOptions.errorListener().ifPresent(handler::setErrorListener);
    TaskRunner runner =
        TaskRunner.create(
            context,
            TaskInfo.<InteractiveSegmenterOptions>builder()
                .setTaskName(InteractiveSegmenter.class.getSimpleName())
                .setTaskRunningModeName(RunningMode.IMAGE.name())
                .setTaskGraphName(TASK_GRAPH_NAME)
                .setInputStreams(INPUT_STREAMS)
                .setOutputStreams(OUTPUT_STREAMS)
                .setTaskOptions(segmenterOptions)
                .setEnableFlowLimiting(false)
                .build(),
            handler);
    return new InteractiveSegmenter(runner, segmenterOptions.resultListener().isPresent());
  }
  /**
   * Constructor to initialize an {@link InteractiveSegmenter} from a {@link TaskRunner}.
   *
   * @param taskRunner a {@link TaskRunner}.
   */
  private InteractiveSegmenter(TaskRunner taskRunner, boolean hasResultListener) {
    super(taskRunner, RunningMode.IMAGE, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
    this.hasResultListener = hasResultListener;
    populateLabels();
  }
  /**
   * Populate the labelmap in TensorsToSegmentationCalculator to labels field.
   *
   * @throws MediaPipeException if there is an error during finding TensorsToSegmentationCalculator.
   */
  private void populateLabels() {
    CalculatorGraphConfig graphConfig = this.runner.getCalculatorGraphConfig();
    boolean foundTensorsToSegmentation = false;
    for (CalculatorGraphConfig.Node node : graphConfig.getNodeList()) {
      if (node.getName().contains(TENSORS_TO_SEGMENTATION_CALCULATOR_NAME)) {
        if (foundTensorsToSegmentation) {
          throw new MediaPipeException(
              MediaPipeException.StatusCode.INTERNAL.ordinal(),
              "The graph has more than one mediapipe.tasks.TensorsToSegmentationCalculator.");
        }
        foundTensorsToSegmentation = true;
        TensorsToSegmentationCalculatorOptionsProto.TensorsToSegmentationCalculatorOptions options =
            node.getOptions()
                .getExtension(
                    TensorsToSegmentationCalculatorOptionsProto
                        .TensorsToSegmentationCalculatorOptions.ext);
        for (int i = 0; i < options.getLabelItemsMap().size(); i++) {
          Long labelKey = Long.valueOf(i);
          if (!options.getLabelItemsMap().containsKey(labelKey)) {
            throw new MediaPipeException(
                MediaPipeException.StatusCode.INTERNAL.ordinal(),
                "The lablemap have no expected key: " + labelKey);
          }
          labels.add(options.getLabelItemsMap().get(labelKey).getName());
        }
      }
    }
  }
  /**
   * Performs segmentation on the provided single image with default image processing options, given
   * user's region-of-interest, i.e. without any rotation applied. TODO update java doc
   * for input image format.
   *
   * <p>Users can represent user interaction through {@link RegionOfInterest}, which gives a hint to
   * perform segmentation focusing on the given region of interest.
   *
   * <p>{@link InteractiveSegmenter} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @throws MediaPipeException if there is an internal error. Or if {@link InteractiveSegmenter} is
   *     created with a {@link ResultListener}.
   */
  public ImageSegmenterResult segment(MPImage image, RegionOfInterest roi) {
    return segment(image, roi, ImageProcessingOptions.builder().build());
  }
  /**
   * Performs segmentation on the provided single image, given user's region-of-interest.
   * TODO update java doc for input image format.
   *
   * <p>Users can represent user interaction through {@link RegionOfInterest}, which gives a hint to
   * perform segmentation focusing on the given region of interest.
   *
   * <p>{@link InteractiveSegmenter} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
   *     this method throwing an IllegalArgumentException.
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error. Or if {@link InteractiveSegmenter} is
   *     created with a {@link ResultListener}.
   */
  public ImageSegmenterResult segment(
      MPImage image, RegionOfInterest roi, ImageProcessingOptions imageProcessingOptions) {
    if (hasResultListener) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "ResultListener is provided in the InteractiveSegmenterOptions, but this method will"
              + " return an ImageSegmentationResult.");
    }
    validateImageProcessingOptions(imageProcessingOptions);
    return processImageWithRoi(image, roi, imageProcessingOptions);
  }
  /**
   * Performs segmentation on the provided single image with default image processing options, given
   * user's region-of-interest, i.e. without any rotation applied, and provides zero-copied results
   * via {@link ResultListener} in {@link InteractiveSegmenterOptions}.
   *
   * <p>TODO update java doc for input image format.
   *
   * <p>Users can represent user interaction through {@link RegionOfInterest}, which gives a hint to
   * perform segmentation focusing on the given region of interest.
   *
   * <p>{@link InteractiveSegmenter} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error. Or if {@link InteractiveSegmenter} is
   *     not created wtih {@link ResultListener} set in {@link InteractiveSegmenterOptions}.
   */
  public void segmentWithResultListener(MPImage image, RegionOfInterest roi) {
    segmentWithResultListener(image, roi, ImageProcessingOptions.builder().build());
  }
  /**
   * Performs segmentation on the provided single image given user's region-of-interest, and
   * provides zero-copied results via {@link ResultListener} in {@link InteractiveSegmenterOptions}.
   *
   * <p>TODO update java doc for input image format.
   *
   * <p>Users can represent user interaction through {@link RegionOfInterest}, which gives a hint to
   * perform segmentation focusing on the given region of interest.
   *
   * <p>{@link InteractiveSegmenter} supports the following color space types:
   *
   * <ul>
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
   *     this method throwing an IllegalArgumentException.
   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error. Or if {@link InteractiveSegmenter} is
   *     not created wtih {@link ResultListener} set in {@link InteractiveSegmenterOptions}.
   */
  public void segmentWithResultListener(
      MPImage image, RegionOfInterest roi, ImageProcessingOptions imageProcessingOptions) {
    if (!hasResultListener) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "ResultListener is not set in the InteractiveSegmenterOptions, but this method expects a"
              + " ResultListener to process ImageSegmentationResult.");
    }
    validateImageProcessingOptions(imageProcessingOptions);
    ImageSegmenterResult unused = processImageWithRoi(image, roi, imageProcessingOptions);
  }
  /**
   * Get the category label list of the ImageSegmenter can recognize. For CATEGORY_MASK type, the
   * index in the category mask corresponds to the category in the label list. For CONFIDENCE_MASK
   * type, the output mask list at index corresponds to the category in the label list.
   *
   * <p>If there is no labelmap provided in the model file, empty label list is returned.
   */
  List<String> getLabels() {
    return labels;
  }
  /** Options for setting up an {@link InteractiveSegmenter}. */
  @AutoValue
  public abstract static class InteractiveSegmenterOptions extends TaskOptions {
    /** Builder for {@link InteractiveSegmenterOptions}. */
    @AutoValue.Builder
    public abstract static class Builder {
      /** Sets the base options for the image segmenter task. */
      public abstract Builder setBaseOptions(BaseOptions value);
      /** The output type from image segmenter. */
      public abstract Builder setOutputType(OutputType value);
      /**
       * Sets an optional {@link ResultListener} to receive the segmentation results when the graph
       * pipeline is done processing an image.
       */
      public abstract Builder setResultListener(
          ResultListener<ImageSegmenterResult, MPImage> value);
      /** Sets an optional {@link ErrorListener}}. */
      public abstract Builder setErrorListener(ErrorListener value);
      abstract InteractiveSegmenterOptions autoBuild();
      /** Builds the {@link InteractiveSegmenterOptions} instance. */
      public final InteractiveSegmenterOptions build() {
        return autoBuild();
      }
    }
    abstract BaseOptions baseOptions();
    abstract OutputType outputType();
    abstract Optional<ResultListener<ImageSegmenterResult, MPImage>> resultListener();
    abstract Optional<ErrorListener> errorListener();
    /** The output type of segmentation results. */
    public enum OutputType {
      // Gives a single output mask where each pixel represents the class which
      // the pixel in the original image was predicted to belong to.
      CATEGORY_MASK,
      // Gives a list of output masks where, for each mask, each pixel represents
      // the prediction confidence, usually in the [0, 1] range.
      CONFIDENCE_MASK
    }
    public static Builder builder() {
      return new AutoValue_InteractiveSegmenter_InteractiveSegmenterOptions.Builder()
          .setOutputType(OutputType.CATEGORY_MASK);
    }
    /**
     * Converts an {@link InteractiveSegmenterOptions} to a {@link CalculatorOptions} protobuf
     * message.
     */
    @Override
    public CalculatorOptions convertToCalculatorOptionsProto() {
      ImageSegmenterGraphOptionsProto.ImageSegmenterGraphOptions.Builder taskOptionsBuilder =
          ImageSegmenterGraphOptionsProto.ImageSegmenterGraphOptions.newBuilder()
              .setBaseOptions(
                  BaseOptionsProto.BaseOptions.newBuilder()
                      .setUseStreamMode(false)
                      .mergeFrom(convertBaseOptionsToProto(baseOptions()))
                      .build());
      SegmenterOptionsProto.SegmenterOptions.Builder segmenterOptionsBuilder =
          SegmenterOptionsProto.SegmenterOptions.newBuilder();
      if (outputType() == OutputType.CONFIDENCE_MASK) {
        segmenterOptionsBuilder.setOutputType(
            SegmenterOptionsProto.SegmenterOptions.OutputType.CONFIDENCE_MASK);
      } else if (outputType() == OutputType.CATEGORY_MASK) {
        segmenterOptionsBuilder.setOutputType(
            SegmenterOptionsProto.SegmenterOptions.OutputType.CATEGORY_MASK);
      }
      taskOptionsBuilder.setSegmenterOptions(segmenterOptionsBuilder);
      return CalculatorOptions.newBuilder()
          .setExtension(
              ImageSegmenterGraphOptionsProto.ImageSegmenterGraphOptions.ext,
              taskOptionsBuilder.build())
          .build();
    }
  }
  /**
   * Validates that the provided {@link ImageProcessingOptions} doesn't contain a
   * region-of-interest.
   */
  private static void validateImageProcessingOptions(
      ImageProcessingOptions imageProcessingOptions) {
    if (imageProcessingOptions.regionOfInterest().isPresent()) {
      throw new IllegalArgumentException(
          "InteractiveSegmenter doesn't support region-of-interest.");
    }
  }
  /** The Region-Of-Interest (ROI) to interact with. */
  public static class RegionOfInterest {
    private NormalizedKeypoint keypoint;
    private RegionOfInterest() {}
    /**
     * Creates a {@link RegionOfInterest} instance representing a single point pointing to the
     * object that the user wants to segment.
     */
    public static RegionOfInterest create(NormalizedKeypoint keypoint) {
      RegionOfInterest roi = new RegionOfInterest();
      roi.keypoint = keypoint;
      return roi;
    }
  }
  /**
   * Converts a {@link RegionOfInterest} instance into a {@link RenderData} protobuf message
   *
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @throws IllegalArgumentException if {@link RegionOfInterest} does not represent a valid user
   *     interaction.
   */
  private static RenderData convertToRenderData(RegionOfInterest roi) {
    RenderData.Builder builder = RenderData.newBuilder();
    if (roi.keypoint != null) {
      return builder
          .addRenderAnnotations(
              RenderAnnotation.newBuilder()
                  .setColor(Color.newBuilder().setR(255))
                  .setPoint(
                      RenderAnnotation.Point.newBuilder()
                          .setX(roi.keypoint.x())
                          .setY(roi.keypoint.y())))
          .build();
    }
    throw new IllegalArgumentException(
        "RegionOfInterest does not include a valid user interaction");
  }
  /**
   * A synchronous method to process single image inputs. The call blocks the current thread until a
   * failure status or a successful result is returned.
   *
   * <p>This is almost the same as {@link BaseVisionTaskApi.processImageData} except accepting an
   * additional {@link RegionOfInterest}.
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
   * @param roi a {@link RegionOfInterest} object to represent user interaction.
   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
   *     input image before running inference.
   * @throws MediaPipeException if the task is not in the image mode.
   */
  private ImageSegmenterResult processImageWithRoi(
      MPImage image, RegionOfInterest roi, ImageProcessingOptions imageProcessingOptions) {
    if (runningMode != RunningMode.IMAGE) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "Task is not initialized with the image mode. Current running mode:"
              + runningMode.name());
    }
    Map<String, Packet> inputPackets = new HashMap<>();
    inputPackets.put(IMAGE_IN_STREAM_NAME, runner.getPacketCreator().createImage(image));
    RenderData renderData = convertToRenderData(roi);
    inputPackets.put(ROI_IN_STREAM_NAME, runner.getPacketCreator().createProto(renderData));
    inputPackets.put(
        NORM_RECT_IN_STREAM_NAME,
        runner.getPacketCreator().createProto(convertToNormalizedRect(imageProcessingOptions)));
    return (ImageSegmenterResult) runner.process(inputPackets);
  }
 }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/AndroidManifest.xml
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/AndroidManifest.xml
@ -0,0 +1,24 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.google.mediapipe.tasks.vision.interactivesegmentertest"
    android:versionCode="1"
    android:versionName="1.0" >
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
    <uses-sdk android:minSdkVersion="24"
        android:targetSdkVersion="30" />
    <application
        android:label="interactivesegmentertest"
        android:name="android.support.multidex.MultiDexApplication"
        android:taskAffinity="">
        <uses-library android:name="android.test.runner" />
    </application>
    <instrumentation
        android:name="com.google.android.apps.common.testing.testrunner.GoogleInstrumentationTestRunner"
        android:targetPackage="com.google.mediapipe.tasks.vision.interactivesegmentertest" />
 </manifest>
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/BUILD
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/BUILD
@ -0,0 +1,19 @@
 # Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 package(default_visibility = ["//mediapipe/tasks:internal"])
 licenses(["notice"])
 # TODO: Enable this in OSS
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/InteractiveSegmenterTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/interactivesegmenter/InteractiveSegmenterTest.java
@ -0,0 +1,92 @@
 // Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package com.google.mediapipe.tasks.vision.interactivesegmenter;
 import static com.google.common.truth.Truth.assertThat;
 import android.content.res.AssetManager;
 import android.graphics.BitmapFactory;
 import androidx.test.core.app.ApplicationProvider;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import com.google.mediapipe.framework.image.BitmapImageBuilder;
 import com.google.mediapipe.framework.image.MPImage;
 import com.google.mediapipe.tasks.components.containers.NormalizedKeypoint;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.vision.imagesegmenter.ImageSegmenterResult;
 import com.google.mediapipe.tasks.vision.interactivesegmenter.InteractiveSegmenter.InteractiveSegmenterOptions;
 import java.io.InputStream;
 import java.util.List;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Suite;
 import org.junit.runners.Suite.SuiteClasses;
 /** Test for {@link InteractiveSegmenter}. */
@RunWith(Suite.class)
@SuiteClasses({
  InteractiveSegmenterTest.General.class,
 })
 public class InteractiveSegmenterTest {
  private static final String DEEPLAB_MODEL_FILE = "ptm_512_hdt_ptm_woid.tflite";
  private static final String CATS_AND_DOGS_IMAGE = "cats_and_dogs.jpg";
  private static final int MAGNIFICATION_FACTOR = 10;
  @RunWith(AndroidJUnit4.class)
  public static final class General extends InteractiveSegmenterTest {
    @Test
    public void segment_successWithCategoryMask() throws Exception {
      final String inputImageName = CATS_AND_DOGS_IMAGE;
      final InteractiveSegmenter.RegionOfInterest roi =
          InteractiveSegmenter.RegionOfInterest.create(NormalizedKeypoint.create(0.25f, 0.9f));
      InteractiveSegmenterOptions options =
          InteractiveSegmenterOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(DEEPLAB_MODEL_FILE).build())
              .setOutputType(InteractiveSegmenterOptions.OutputType.CATEGORY_MASK)
              .build();
      InteractiveSegmenter imageSegmenter =
          InteractiveSegmenter.createFromOptions(
              ApplicationProvider.getApplicationContext(), options);
      MPImage image = getImageFromAsset(inputImageName);
      ImageSegmenterResult actualResult = imageSegmenter.segment(image, roi);
      List<MPImage> segmentations = actualResult.segmentations();
      assertThat(segmentations.size()).isEqualTo(1);
    }
    @Test
    public void segment_successWithConfidenceMask() throws Exception {
      final String inputImageName = CATS_AND_DOGS_IMAGE;
      final InteractiveSegmenter.RegionOfInterest roi =
          InteractiveSegmenter.RegionOfInterest.create(NormalizedKeypoint.create(0.25f, 0.9f));
      InteractiveSegmenterOptions options =
          InteractiveSegmenterOptions.builder()
              .setBaseOptions(BaseOptions.builder().setModelAssetPath(DEEPLAB_MODEL_FILE).build())
              .setOutputType(InteractiveSegmenterOptions.OutputType.CONFIDENCE_MASK)
              .build();
      InteractiveSegmenter imageSegmenter =
          InteractiveSegmenter.createFromOptions(
              ApplicationProvider.getApplicationContext(), options);
      ImageSegmenterResult actualResult =
          imageSegmenter.segment(getImageFromAsset(inputImageName), roi);
      List<MPImage> segmentations = actualResult.segmentations();
      assertThat(segmentations.size()).isEqualTo(2);
    }
  }
  private static MPImage getImageFromAsset(String filePath) throws Exception {
    AssetManager assetManager = ApplicationProvider.getApplicationContext().getAssets();
    InputStream istr = assetManager.open(filePath);
    return new BitmapImageBuilder(BitmapFactory.decodeStream(istr)).build();
  }
 }