Add support for image rotation in Java vision tasks.

PiperOrigin-RevId: 483493729
2022-10-24 14:59:09 -07:00 · 2022-10-24 14:59:09 -07:00 · 94cd134809
commit 94cd134809
parent b8502decff
14 changed files with 762 additions and 283 deletions
--- a/mediapipe/tasks/examples/android/objectdetector/src/main/BUILD
+++ b/mediapipe/tasks/examples/android/objectdetector/src/main/BUILD
@ -31,6 +31,7 @@ android_binary(
    multidex = "native",
    resource_files = ["//mediapipe/tasks/examples/android:resource_files"],
    deps = [
+        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
        "//mediapipe/java/com/google/mediapipe/framework/image",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:detection",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/core",
--- a/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java
+++ b/mediapipe/tasks/examples/android/objectdetector/src/main/java/com/google/mediapipe/tasks/examples/objectdetector/MainActivity.java
@ -16,7 +16,6 @@ package com.google.mediapipe.tasks.examples.objectdetector;

 import android.content.Intent;
 import android.graphics.Bitmap;
-import android.graphics.Matrix;
 import android.media.MediaMetadataRetriever;
 import android.os.Bundle;
 import android.provider.MediaStore;
@ -29,9 +28,11 @@ import androidx.activity.result.ActivityResultLauncher;
 import androidx.activity.result.contract.ActivityResultContracts;
 import androidx.exifinterface.media.ExifInterface;
 // ContentResolver dependency
+import com.google.mediapipe.framework.MediaPipeException;
 import com.google.mediapipe.framework.image.BitmapImageBuilder;
 import com.google.mediapipe.framework.image.MPImage;
 import com.google.mediapipe.tasks.core.BaseOptions;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetectionResult;
 import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector;
@ -82,6 +83,7 @@ public class MainActivity extends AppCompatActivity {
              if (resultIntent != null) {
                if (result.getResultCode() == RESULT_OK) {
                  Bitmap bitmap = null;
+                  int rotation = 0;
                  try {
                    bitmap =
                        downscaleBitmap(
@ -93,13 +95,16 @@ public class MainActivity extends AppCompatActivity {
                  try {
                    InputStream imageData =
                        this.getContentResolver().openInputStream(resultIntent.getData());
-                    bitmap = rotateBitmap(bitmap, imageData);
-                  } catch (IOException e) {
+                    rotation = getImageRotation(imageData);
+                  } catch (IOException | MediaPipeException e) {
                    Log.e(TAG, "Bitmap rotation error:" + e);
                  }
                  if (bitmap != null) {
                    MPImage image = new BitmapImageBuilder(bitmap).build();
-                    ObjectDetectionResult detectionResult = objectDetector.detect(image);
+                    ObjectDetectionResult detectionResult =
+                        objectDetector.detect(
+                            image,
+                            ImageProcessingOptions.builder().setRotationDegrees(rotation).build());
                    imageView.setData(image, detectionResult);
                    runOnUiThread(() -> imageView.update());
                  }
@ -210,28 +215,25 @@ public class MainActivity extends AppCompatActivity {
    return Bitmap.createScaledBitmap(originalBitmap, width, height, false);
  }

-  private Bitmap rotateBitmap(Bitmap inputBitmap, InputStream imageData) throws IOException {
+  private int getImageRotation(InputStream imageData) throws IOException, MediaPipeException {
    int orientation =
        new ExifInterface(imageData)
            .getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);
-    if (orientation == ExifInterface.ORIENTATION_NORMAL) {
-      return inputBitmap;
-    }
-    Matrix matrix = new Matrix();
    switch (orientation) {
+      case ExifInterface.ORIENTATION_NORMAL:
+        return 0;
      case ExifInterface.ORIENTATION_ROTATE_90:
-        matrix.postRotate(90);
-        break;
+        return 90;
      case ExifInterface.ORIENTATION_ROTATE_180:
-        matrix.postRotate(180);
-        break;
+        return 180;
      case ExifInterface.ORIENTATION_ROTATE_270:
-        matrix.postRotate(270);
-        break;
+        return 270;
      default:
-        matrix.postRotate(0);
+        // TODO: use getRotationDegrees() and isFlipped() instead of switch once flip
+        // is supported.
+        throw new MediaPipeException(
+            MediaPipeException.StatusCode.UNIMPLEMENTED.ordinal(),
+            "Flipped images are not supported yet.");
    }
-    return Bitmap.createBitmap(
-        inputBitmap, 0, 0, inputBitmap.getWidth(), inputBitmap.getHeight(), matrix, true);
  }
 }
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/BUILD
@ -28,6 +28,7 @@ android_library(
        "//mediapipe/java/com/google/mediapipe/framework:android_framework_no_mff",
        "//mediapipe/java/com/google/mediapipe/framework/image",
        "//mediapipe/tasks/java/com/google/mediapipe/tasks/core",
+        "//third_party:autovalue",
        "@maven//:com_google_guava_guava",
    ],
 )
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/core/BaseVisionTaskApi.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/core/BaseVisionTaskApi.java
@ -24,7 +24,6 @@ import com.google.mediapipe.tasks.core.TaskResult;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Optional;

 /** The base class of MediaPipe vision tasks. */
 public class BaseVisionTaskApi implements AutoCloseable {
@ -32,7 +31,7 @@ public class BaseVisionTaskApi implements AutoCloseable {
  private final TaskRunner runner;
  private final RunningMode runningMode;
  private final String imageStreamName;
-  private final Optional<String> normRectStreamName;
+  private final String normRectStreamName;

  static {
    System.loadLibrary("mediapipe_tasks_vision_jni");
@ -40,27 +39,13 @@ public class BaseVisionTaskApi implements AutoCloseable {
  }

  /**
-   * Constructor to initialize a {@link BaseVisionTaskApi} only taking images as input.
+   * Constructor to initialize a {@link BaseVisionTaskApi}.
   *
   * @param runner a {@link TaskRunner}.
   * @param runningMode a mediapipe vision task {@link RunningMode}.
   * @param imageStreamName the name of the input image stream.
-   */
-  public BaseVisionTaskApi(TaskRunner runner, RunningMode runningMode, String imageStreamName) {
-    this.runner = runner;
-    this.runningMode = runningMode;
-    this.imageStreamName = imageStreamName;
-    this.normRectStreamName = Optional.empty();
-  }
-
-  /**
-   * Constructor to initialize a {@link BaseVisionTaskApi} taking images and normalized rects as
-   * input.
-   *
-   * @param runner a {@link TaskRunner}.
-   * @param runningMode a mediapipe vision task {@link RunningMode}.
-   * @param imageStreamName the name of the input image stream.
-   * @param normRectStreamName the name of the input normalized rect image stream.
+   * @param normRectStreamName the name of the input normalized rect image stream used to provide
+   *     (mandatory) rotation and (optional) region-of-interest.
   */
  public BaseVisionTaskApi(
      TaskRunner runner,
@ -70,7 +55,7 @@ public class BaseVisionTaskApi implements AutoCloseable {
    this.runner = runner;
    this.runningMode = runningMode;
    this.imageStreamName = imageStreamName;
-    this.normRectStreamName = Optional.of(normRectStreamName);
+    this.normRectStreamName = normRectStreamName;
  }

  /**
@ -78,53 +63,23 @@ public class BaseVisionTaskApi implements AutoCloseable {
   * failure status or a successful result is returned.
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
-   * @throws MediaPipeException if the task is not in the image mode or requires a normalized rect
-   *     input.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
+   * @throws MediaPipeException if the task is not in the image mode.
   */
-  protected TaskResult processImageData(MPImage image) {
+  protected TaskResult processImageData(
+      MPImage image, ImageProcessingOptions imageProcessingOptions) {
    if (runningMode != RunningMode.IMAGE) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "Task is not initialized with the image mode. Current running mode:"
              + runningMode.name());
    }
-    if (normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task expects a normalized rect as input.");
-    }
-    Map<String, Packet> inputPackets = new HashMap<>();
-    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
-    return runner.process(inputPackets);
-  }
-
-  /**
-   * A synchronous method to process single image inputs. The call blocks the current thread until a
-   * failure status or a successful result is returned.
-   *
-   * @param image a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} defining the region-of-interest to process in the image. Coordinates
-   *     are expected to be specified as normalized values in [0,1].
-   * @throws MediaPipeException if the task is not in the image mode or doesn't require a normalized
-   *     rect.
-   */
-  protected TaskResult processImageData(MPImage image, RectF roi) {
-    if (runningMode != RunningMode.IMAGE) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task is not initialized with the image mode. Current running mode:"
-              + runningMode.name());
-    }
-    if (!normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task doesn't expect a normalized rect as input.");
-    }
    Map<String, Packet> inputPackets = new HashMap<>();
    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
    inputPackets.put(
-        normRectStreamName.get(),
-        runner.getPacketCreator().createProto(convertToNormalizedRect(roi)));
+        normRectStreamName,
+        runner.getPacketCreator().createProto(convertToNormalizedRect(imageProcessingOptions)));
    return runner.process(inputPackets);
  }

@ -133,55 +88,24 @@ public class BaseVisionTaskApi implements AutoCloseable {
   * until a failure status or a successful result is returned.
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
   * @param timestampMs the corresponding timestamp of the input image in milliseconds.
-   * @throws MediaPipeException if the task is not in the video mode or requires a normalized rect
-   *     input.
+   * @throws MediaPipeException if the task is not in the video mode.
   */
-  protected TaskResult processVideoData(MPImage image, long timestampMs) {
+  protected TaskResult processVideoData(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
    if (runningMode != RunningMode.VIDEO) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "Task is not initialized with the video mode. Current running mode:"
              + runningMode.name());
    }
-    if (normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task expects a normalized rect as input.");
-    }
-    Map<String, Packet> inputPackets = new HashMap<>();
-    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
-    return runner.process(inputPackets, timestampMs * MICROSECONDS_PER_MILLISECOND);
-  }
-
-  /**
-   * A synchronous method to process continuous video frames. The call blocks the current thread
-   * until a failure status or a successful result is returned.
-   *
-   * @param image a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} defining the region-of-interest to process in the image. Coordinates
-   *     are expected to be specified as normalized values in [0,1].
-   * @param timestampMs the corresponding timestamp of the input image in milliseconds.
-   * @throws MediaPipeException if the task is not in the video mode or doesn't require a normalized
-   *     rect.
-   */
-  protected TaskResult processVideoData(MPImage image, RectF roi, long timestampMs) {
-    if (runningMode != RunningMode.VIDEO) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task is not initialized with the video mode. Current running mode:"
-              + runningMode.name());
-    }
-    if (!normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task doesn't expect a normalized rect as input.");
-    }
    Map<String, Packet> inputPackets = new HashMap<>();
    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
    inputPackets.put(
-        normRectStreamName.get(),
-        runner.getPacketCreator().createProto(convertToNormalizedRect(roi)));
+        normRectStreamName,
+        runner.getPacketCreator().createProto(convertToNormalizedRect(imageProcessingOptions)));
    return runner.process(inputPackets, timestampMs * MICROSECONDS_PER_MILLISECOND);
  }

@ -190,55 +114,24 @@ public class BaseVisionTaskApi implements AutoCloseable {
   * available in the user-defined result listener.
   *
   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
   * @param timestampMs the corresponding timestamp of the input image in milliseconds.
-   * @throws MediaPipeException if the task is not in the video mode or requires a normalized rect
-   *     input.
+   * @throws MediaPipeException if the task is not in the stream mode.
   */
-  protected void sendLiveStreamData(MPImage image, long timestampMs) {
+  protected void sendLiveStreamData(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
    if (runningMode != RunningMode.LIVE_STREAM) {
      throw new MediaPipeException(
          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
          "Task is not initialized with the live stream mode. Current running mode:"
              + runningMode.name());
    }
-    if (normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task expects a normalized rect as input.");
-    }
-    Map<String, Packet> inputPackets = new HashMap<>();
-    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
-    runner.send(inputPackets, timestampMs * MICROSECONDS_PER_MILLISECOND);
-  }
-
-  /**
-   * An asynchronous method to send live stream data to the {@link TaskRunner}. The results will be
-   * available in the user-defined result listener.
-   *
-   * @param image a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} defining the region-of-interest to process in the image. Coordinates
-   *     are expected to be specified as normalized values in [0,1].
-   * @param timestampMs the corresponding timestamp of the input image in milliseconds.
-   * @throws MediaPipeException if the task is not in the video mode or doesn't require a normalized
-   *     rect.
-   */
-  protected void sendLiveStreamData(MPImage image, RectF roi, long timestampMs) {
-    if (runningMode != RunningMode.LIVE_STREAM) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task is not initialized with the live stream mode. Current running mode:"
-              + runningMode.name());
-    }
-    if (!normRectStreamName.isPresent()) {
-      throw new MediaPipeException(
-          MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
-          "Task doesn't expect a normalized rect as input.");
-    }
    Map<String, Packet> inputPackets = new HashMap<>();
    inputPackets.put(imageStreamName, runner.getPacketCreator().createImage(image));
    inputPackets.put(
-        normRectStreamName.get(),
-        runner.getPacketCreator().createProto(convertToNormalizedRect(roi)));
+        normRectStreamName,
+        runner.getPacketCreator().createProto(convertToNormalizedRect(imageProcessingOptions)));
    runner.send(inputPackets, timestampMs * MICROSECONDS_PER_MILLISECOND);
  }

@ -248,13 +141,23 @@ public class BaseVisionTaskApi implements AutoCloseable {
    runner.close();
  }

-  /** Converts a {@link RectF} object into a {@link NormalizedRect} protobuf message. */
-  private static NormalizedRect convertToNormalizedRect(RectF rect) {
+  /**
+   * Converts an {@link ImageProcessingOptions} instance into a {@link NormalizedRect} protobuf
+   * message.
+   */
+  private static NormalizedRect convertToNormalizedRect(
+      ImageProcessingOptions imageProcessingOptions) {
+    RectF regionOfInterest =
+        imageProcessingOptions.regionOfInterest().isPresent()
+            ? imageProcessingOptions.regionOfInterest().get()
+            : new RectF(0, 0, 1, 1);
    return NormalizedRect.newBuilder()
-        .setXCenter(rect.centerX())
-        .setYCenter(rect.centerY())
-        .setWidth(rect.width())
-        .setHeight(rect.height())
+        .setXCenter(regionOfInterest.centerX())
+        .setYCenter(regionOfInterest.centerY())
+        .setWidth(regionOfInterest.width())
+        .setHeight(regionOfInterest.height())
+        // Convert to radians anti-clockwise.
+        .setRotation(-(float) Math.PI * imageProcessingOptions.rotationDegrees() / 180.0f)
        .build();
  }
 }
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/core/ImageProcessingOptions.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/core/ImageProcessingOptions.java
@ -0,0 +1,92 @@
+// Copyright 2022 The MediaPipe Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.mediapipe.tasks.vision.core;
+
+import android.graphics.RectF;
+import com.google.auto.value.AutoValue;
+import java.util.Optional;
+
+// TODO: add support for image flipping.
+/** Options for image processing. */
+@AutoValue
+public abstract class ImageProcessingOptions {
+
+  /**
+   * Builder for {@link ImageProcessingOptions}.
+   *
+   * <p>If both region-of-interest and rotation are specified, the crop around the
+   * region-of-interest is extracted first, then the specified rotation is applied to the crop.
+   */
+  @AutoValue.Builder
+  public abstract static class Builder {
+    /**
+     * Sets the optional region-of-interest to crop from the image. If not specified, the full image
+     * is used.
+     *
+     * <p>Coordinates must be in [0,1], {@code left} must be < {@code right} and {@code top} must be
+     * < {@code bottom}, otherwise an IllegalArgumentException will be thrown when {@link #build()}
+     * is called.
+     */
+    public abstract Builder setRegionOfInterest(RectF value);
+
+    /**
+     * Sets the rotation to apply to the image (or cropped region-of-interest), in degrees
+     * clockwise. Defaults to 0.
+     *
+     * <p>The rotation must be a multiple (positive or negative) of 90°, otherwise an
+     * IllegalArgumentException will be thrown when {@link #build()} is called.
+     */
+    public abstract Builder setRotationDegrees(int value);
+
+    abstract ImageProcessingOptions autoBuild();
+
+    /**
+     * Validates and builds the {@link ImageProcessingOptions} instance.
+     *
+     * @throws IllegalArgumentException if some of the provided values do not meet their
+     *     requirements.
+     */
+    public final ImageProcessingOptions build() {
+      ImageProcessingOptions options = autoBuild();
+      if (options.regionOfInterest().isPresent()) {
+        RectF roi = options.regionOfInterest().get();
+        if (roi.left >= roi.right || roi.top >= roi.bottom) {
+          throw new IllegalArgumentException(
+              String.format(
+                  "Expected left < right and top < bottom, found: %s.", roi.toShortString()));
+        }
+        if (roi.left < 0 || roi.right > 1 || roi.top < 0 || roi.bottom > 1) {
+          throw new IllegalArgumentException(
+              String.format("Expected RectF values in [0,1], found: %s.", roi.toShortString()));
+        }
+      }
+      if (options.rotationDegrees() % 90 != 0) {
+        throw new IllegalArgumentException(
+            String.format(
+                "Expected rotation to be a multiple of 90°, found: %d.",
+                options.rotationDegrees()));
+      }
+      return options;
+    }
+  }
+
+  public abstract Optional<RectF> regionOfInterest();
+
+  public abstract int rotationDegrees();
+
+  public static Builder builder() {
+    return new AutoValue_ImageProcessingOptions.Builder().setRotationDegrees(0);
+  }
+}
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizer.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizer.java
@ -15,7 +15,6 @@
 package com.google.mediapipe.tasks.vision.gesturerecognizer;

 import android.content.Context;
-import android.graphics.RectF;
 import android.os.ParcelFileDescriptor;
 import com.google.auto.value.AutoValue;
 import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
@ -37,6 +36,7 @@ import com.google.mediapipe.tasks.core.TaskOptions;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import com.google.mediapipe.tasks.core.proto.BaseOptionsProto;
 import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.gesturerecognizer.proto.GestureClassifierGraphOptionsProto;
 import com.google.mediapipe.tasks.vision.gesturerecognizer.proto.GestureRecognizerGraphOptionsProto;
@ -212,6 +212,25 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
  }

+  /**
+   * Performs gesture recognition on the provided single image with default image processing
+   * options, i.e. without any rotation applied. Only use this method when the {@link
+   * GestureRecognizer} is created with {@link RunningMode.IMAGE}. TODO update java doc
+   * for input image format.
+   *
+   * <p>{@link GestureRecognizer} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public GestureRecognitionResult recognize(MPImage image) {
+    return recognize(image, ImageProcessingOptions.builder().build());
+  }
+
  /**
   * Performs gesture recognition on the provided single image. Only use this method when the {@link
   * GestureRecognizer} is created with {@link RunningMode.IMAGE}. TODO update java doc
@ -223,12 +242,41 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public GestureRecognitionResult recognize(MPImage inputImage) {
-    // TODO: add proper support for rotations.
-    return (GestureRecognitionResult) processImageData(inputImage, buildFullImageRectF());
+  public GestureRecognitionResult recognize(
+      MPImage image, ImageProcessingOptions imageProcessingOptions) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    return (GestureRecognitionResult) processImageData(image, imageProcessingOptions);
+  }
+
+  /**
+   * Performs gesture recognition on the provided video frame with default image processing options,
+   * i.e. without any rotation applied. Only use this method when the {@link GestureRecognizer} is
+   * created with {@link RunningMode.VIDEO}.
+   *
+   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
+   * must be monotonically increasing.
+   *
+   * <p>{@link GestureRecognizer} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public GestureRecognitionResult recognizeForVideo(MPImage image, long timestampMs) {
+    return recognizeForVideo(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -244,14 +292,43 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public GestureRecognitionResult recognizeForVideo(MPImage inputImage, long inputTimestampMs) {
-    // TODO: add proper support for rotations.
-    return (GestureRecognitionResult)
-        processVideoData(inputImage, buildFullImageRectF(), inputTimestampMs);
+  public GestureRecognitionResult recognizeForVideo(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    return (GestureRecognitionResult) processVideoData(image, imageProcessingOptions, timestampMs);
+  }
+
+  /**
+   * Sends live image data to perform gesture recognition with default image processing options,
+   * i.e. without any rotation applied, and the results will be available via the {@link
+   * ResultListener} provided in the {@link GestureRecognizerOptions}. Only use this method when the
+   * {@link GestureRecognition} is created with {@link RunningMode.LIVE_STREAM}.
+   *
+   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
+   * sent to the gesture recognizer. The input timestamps must be monotonically increasing.
+   *
+   * <p>{@link GestureRecognizer} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public void recognizeAsync(MPImage image, long timestampMs) {
+    recognizeAsync(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -268,13 +345,20 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public void recognizeAsync(MPImage inputImage, long inputTimestampMs) {
-    // TODO: add proper support for rotations.
-    sendLiveStreamData(inputImage, buildFullImageRectF(), inputTimestampMs);
+  public void recognizeAsync(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    sendLiveStreamData(image, imageProcessingOptions, timestampMs);
  }

  /** Options for setting up an {@link GestureRecognizer}. */
@ -445,8 +529,14 @@ public final class GestureRecognizer extends BaseVisionTaskApi {
    }
  }

-  /** Creates a RectF covering the full image. */
-  private static RectF buildFullImageRectF() {
-    return new RectF(0, 0, 1, 1);
+  /**
+   * Validates that the provided {@link ImageProcessingOptions} doesn't contain a
+   * region-of-interest.
+   */
+  private static void validateImageProcessingOptions(
+      ImageProcessingOptions imageProcessingOptions) {
+    if (imageProcessingOptions.regionOfInterest().isPresent()) {
+      throw new IllegalArgumentException("GestureRecognizer doesn't support region-of-interest.");
+    }
  }
 }
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imageclassifier/ImageClassifier.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imageclassifier/ImageClassifier.java
@ -15,7 +15,6 @@
 package com.google.mediapipe.tasks.vision.imageclassifier;

 import android.content.Context;
-import android.graphics.RectF;
 import android.os.ParcelFileDescriptor;
 import com.google.auto.value.AutoValue;
 import com.google.mediapipe.proto.CalculatorOptionsProto.CalculatorOptions;
@ -37,6 +36,7 @@ import com.google.mediapipe.tasks.core.TaskOptions;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import com.google.mediapipe.tasks.core.proto.BaseOptionsProto;
 import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.imageclassifier.proto.ImageClassifierGraphOptionsProto;
 import java.io.File;
@ -215,6 +215,24 @@ public final class ImageClassifier extends BaseVisionTaskApi {
    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
  }

+  /**
+   * Performs classification on the provided single image with default image processing options,
+   * i.e. using the whole image as region-of-interest and without any rotation applied. Only use
+   * this method when the {@link ImageClassifier} is created with {@link RunningMode.IMAGE}.
+   *
+   * <p>{@link ImageClassifier} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public ImageClassificationResult classify(MPImage image) {
+    return classify(image, ImageProcessingOptions.builder().build());
+  }
+
  /**
   * Performs classification on the provided single image. Only use this method when the {@link
   * ImageClassifier} is created with {@link RunningMode.IMAGE}.
@ -225,16 +243,23 @@ public final class ImageClassifier extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
   * @throws MediaPipeException if there is an internal error.
   */
-  public ImageClassificationResult classify(MPImage inputImage) {
-    return (ImageClassificationResult) processImageData(inputImage, buildFullImageRectF());
+  public ImageClassificationResult classify(
+      MPImage image, ImageProcessingOptions imageProcessingOptions) {
+    return (ImageClassificationResult) processImageData(image, imageProcessingOptions);
  }

  /**
-   * Performs classification on the provided single image and region-of-interest. Only use this
-   * method when the {@link ImageClassifier} is created with {@link RunningMode.IMAGE}.
+   * Performs classification on the provided video frame with default image processing options, i.e.
+   * using the whole image as region-of-interest and without any rotation applied. Only use this
+   * method when the {@link ImageClassifier} is created with {@link RunningMode.VIDEO}.
+   *
+   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
+   * must be monotonically increasing.
   *
   * <p>{@link ImageClassifier} supports the following color space types:
   *
@ -242,13 +267,12 @@ public final class ImageClassifier extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} specifying the region of interest on which to perform
-   *     classification. Coordinates are expected to be specified as normalized values in [0,1].
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
-  public ImageClassificationResult classify(MPImage inputImage, RectF roi) {
-    return (ImageClassificationResult) processImageData(inputImage, roi);
+  public ImageClassificationResult classifyForVideo(MPImage image, long timestampMs) {
+    return classifyForVideo(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -264,21 +288,26 @@ public final class ImageClassifier extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
+   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
-  public ImageClassificationResult classifyForVideo(MPImage inputImage, long inputTimestampMs) {
-    return (ImageClassificationResult)
-        processVideoData(inputImage, buildFullImageRectF(), inputTimestampMs);
+  public ImageClassificationResult classifyForVideo(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    return (ImageClassificationResult) processVideoData(image, imageProcessingOptions, timestampMs);
  }

  /**
-   * Performs classification on the provided video frame with additional region-of-interest. Only
-   * use this method when the {@link ImageClassifier} is created with {@link RunningMode.VIDEO}.
+   * Sends live image data to perform classification with default image processing options, i.e.
+   * using the whole image as region-of-interest and without any rotation applied, and the results
+   * will be available via the {@link ResultListener} provided in the {@link
+   * ImageClassifierOptions}. Only use this method when the {@link ImageClassifier} is created with
+   * {@link RunningMode.LIVE_STREAM}.
   *
-   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
-   * must be monotonically increasing.
+   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
+   * sent to the object detector. The input timestamps must be monotonically increasing.
   *
   * <p>{@link ImageClassifier} supports the following color space types:
   *
@ -286,15 +315,12 @@ public final class ImageClassifier extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} specifying the region of interest on which to perform
-   *     classification. Coordinates are expected to be specified as normalized values in [0,1].
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
-  public ImageClassificationResult classifyForVideo(
-      MPImage inputImage, RectF roi, long inputTimestampMs) {
-    return (ImageClassificationResult) processVideoData(inputImage, roi, inputTimestampMs);
+  public void classifyAsync(MPImage image, long timestampMs) {
+    classifyAsync(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -311,37 +337,15 @@ public final class ImageClassifier extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference.
+   * @param timestampMs the input timestamp (in milliseconds).
   * @throws MediaPipeException if there is an internal error.
   */
-  public void classifyAsync(MPImage inputImage, long inputTimestampMs) {
-    sendLiveStreamData(inputImage, buildFullImageRectF(), inputTimestampMs);
-  }
-
-  /**
-   * Sends live image data and additional region-of-interest to perform classification, and the
-   * results will be available via the {@link ResultListener} provided in the {@link
-   * ImageClassifierOptions}. Only use this method when the {@link ImageClassifier} is created with
-   * {@link RunningMode.LIVE_STREAM}.
-   *
-   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
-   * sent to the object detector. The input timestamps must be monotonically increasing.
-   *
-   * <p>{@link ImageClassifier} supports the following color space types:
-   *
-   * <ul>
-   *   <li>{@link Bitmap.Config.ARGB_8888}
-   * </ul>
-   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param roi a {@link RectF} specifying the region of interest on which to perform
-   *     classification. Coordinates are expected to be specified as normalized values in [0,1].
-   * @param inputTimestampMs the input timestamp (in milliseconds).
-   * @throws MediaPipeException if there is an internal error.
-   */
-  public void classifyAsync(MPImage inputImage, RectF roi, long inputTimestampMs) {
-    sendLiveStreamData(inputImage, roi, inputTimestampMs);
+  public void classifyAsync(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    sendLiveStreamData(image, imageProcessingOptions, timestampMs);
  }

  /** Options for setting up and {@link ImageClassifier}. */
@ -447,9 +451,4 @@ public final class ImageClassifier extends BaseVisionTaskApi {
          .build();
    }
  }
-
-  /** Creates a RectF covering the full image. */
-  private static RectF buildFullImageRectF() {
-    return new RectF(0, 0, 1, 1);
-  }
 }
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/objectdetector/ObjectDetector.java
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/objectdetector/ObjectDetector.java
@ -32,6 +32,7 @@ import com.google.mediapipe.tasks.core.TaskOptions;
 import com.google.mediapipe.tasks.core.TaskRunner;
 import com.google.mediapipe.tasks.core.proto.BaseOptionsProto;
 import com.google.mediapipe.tasks.vision.core.BaseVisionTaskApi;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.objectdetector.proto.ObjectDetectorOptionsProto;
 import com.google.mediapipe.formats.proto.DetectionProto.Detection;
@ -96,8 +97,10 @@ import java.util.Optional;
 public final class ObjectDetector extends BaseVisionTaskApi {
  private static final String TAG = ObjectDetector.class.getSimpleName();
  private static final String IMAGE_IN_STREAM_NAME = "image_in";
+  private static final String NORM_RECT_IN_STREAM_NAME = "norm_rect_in";
  private static final List<String> INPUT_STREAMS =
-      Collections.unmodifiableList(Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME));
+      Collections.unmodifiableList(
+          Arrays.asList("IMAGE:" + IMAGE_IN_STREAM_NAME, "NORM_RECT:" + NORM_RECT_IN_STREAM_NAME));
  private static final List<String> OUTPUT_STREAMS =
      Collections.unmodifiableList(Arrays.asList("DETECTIONS:detections_out", "IMAGE:image_out"));
  private static final int DETECTIONS_OUT_STREAM_INDEX = 0;
@ -204,7 +207,25 @@ public final class ObjectDetector extends BaseVisionTaskApi {
   * @param runningMode a mediapipe vision task {@link RunningMode}.
   */
  private ObjectDetector(TaskRunner taskRunner, RunningMode runningMode) {
-    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME);
+    super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME);
+  }
+
+  /**
+   * Performs object detection on the provided single image with default image processing options,
+   * i.e. without any rotation applied. Only use this method when the {@link ObjectDetector} is
+   * created with {@link RunningMode.IMAGE}.
+   *
+   * <p>{@link ObjectDetector} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public ObjectDetectionResult detect(MPImage image) {
+    return detect(image, ImageProcessingOptions.builder().build());
  }

  /**
@ -217,11 +238,41 @@ public final class ObjectDetector extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public ObjectDetectionResult detect(MPImage inputImage) {
-    return (ObjectDetectionResult) processImageData(inputImage);
+  public ObjectDetectionResult detect(
+      MPImage image, ImageProcessingOptions imageProcessingOptions) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    return (ObjectDetectionResult) processImageData(image, imageProcessingOptions);
+  }
+
+  /**
+   * Performs object detection on the provided video frame with default image processing options,
+   * i.e. without any rotation applied. Only use this method when the {@link ObjectDetector} is
+   * created with {@link RunningMode.VIDEO}.
+   *
+   * <p>It's required to provide the video frame's timestamp (in milliseconds). The input timestamps
+   * must be monotonically increasing.
+   *
+   * <p>{@link ObjectDetector} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public ObjectDetectionResult detectForVideo(MPImage image, long timestampMs) {
+    return detectForVideo(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -237,12 +288,43 @@ public final class ObjectDetector extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public ObjectDetectionResult detectForVideo(MPImage inputImage, long inputTimestampMs) {
-    return (ObjectDetectionResult) processVideoData(inputImage, inputTimestampMs);
+  public ObjectDetectionResult detectForVideo(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    return (ObjectDetectionResult) processVideoData(image, imageProcessingOptions, timestampMs);
+  }
+
+  /**
+   * Sends live image data to perform object detection with default image processing options, i.e.
+   * without any rotation applied, and the results will be available via the {@link ResultListener}
+   * provided in the {@link ObjectDetectorOptions}. Only use this method when the {@link
+   * ObjectDetector} is created with {@link RunningMode.LIVE_STREAM}.
+   *
+   * <p>It's required to provide a timestamp (in milliseconds) to indicate when the input image is
+   * sent to the object detector. The input timestamps must be monotonically increasing.
+   *
+   * <p>{@link ObjectDetector} supports the following color space types:
+   *
+   * <ul>
+   *   <li>{@link Bitmap.Config.ARGB_8888}
+   * </ul>
+   *
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws MediaPipeException if there is an internal error.
+   */
+  public void detectAsync(MPImage image, long timestampMs) {
+    detectAsync(image, ImageProcessingOptions.builder().build(), timestampMs);
  }

  /**
@ -259,12 +341,20 @@ public final class ObjectDetector extends BaseVisionTaskApi {
   *   <li>{@link Bitmap.Config.ARGB_8888}
   * </ul>
   *
-   * @param inputImage a MediaPipe {@link MPImage} object for processing.
-   * @param inputTimestampMs the input timestamp (in milliseconds).
+   * @param image a MediaPipe {@link MPImage} object for processing.
+   * @param imageProcessingOptions the {@link ImageProcessingOptions} specifying how to process the
+   *     input image before running inference. Note that region-of-interest is <b>not</b> supported
+   *     by this task: specifying {@link ImageProcessingOptions#regionOfInterest()} will result in
+   *     this method throwing an IllegalArgumentException.
+   * @param timestampMs the input timestamp (in milliseconds).
+   * @throws IllegalArgumentException if the {@link ImageProcessingOptions} specify a
+   *     region-of-interest.
   * @throws MediaPipeException if there is an internal error.
   */
-  public void detectAsync(MPImage inputImage, long inputTimestampMs) {
-    sendLiveStreamData(inputImage, inputTimestampMs);
+  public void detectAsync(
+      MPImage image, ImageProcessingOptions imageProcessingOptions, long timestampMs) {
+    validateImageProcessingOptions(imageProcessingOptions);
+    sendLiveStreamData(image, imageProcessingOptions, timestampMs);
  }

  /** Options for setting up an {@link ObjectDetector}. */
@ -415,4 +505,15 @@ public final class ObjectDetector extends BaseVisionTaskApi {
          .build();
    }
  }
+
+  /**
+   * Validates that the provided {@link ImageProcessingOptions} doesn't contain a
+   * region-of-interest.
+   */
+  private static void validateImageProcessingOptions(
+      ImageProcessingOptions imageProcessingOptions) {
+    if (imageProcessingOptions.regionOfInterest().isPresent()) {
+      throw new IllegalArgumentException("ObjectDetector doesn't support region-of-interest.");
+    }
+  }
 }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/AndroidManifest.xml
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/AndroidManifest.xml
@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.google.mediapipe.tasks.vision.coretest"
+    android:versionCode="1"
+    android:versionName="1.0" >
+
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
+
+    <uses-sdk android:minSdkVersion="24"
+        android:targetSdkVersion="30" />
+
+    <application
+        android:label="coretest"
+        android:name="android.support.multidex.MultiDexApplication"
+        android:taskAffinity="">
+        <uses-library android:name="android.test.runner" />
+    </application>
+
+    <instrumentation
+        android:name="com.google.android.apps.common.testing.testrunner.GoogleInstrumentationTestRunner"
+        android:targetPackage="com.google.mediapipe.tasks.vision.coretest" />
+
+</manifest>
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/BUILD
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/BUILD
@ -0,0 +1,19 @@
+# Copyright 2022 The MediaPipe Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package(default_visibility = ["//mediapipe/tasks:internal"])
+
+licenses(["notice"])
+
+# TODO: Enable this in OSS
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/ImageProcessingOptionsTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/core/ImageProcessingOptionsTest.java
@ -0,0 +1,70 @@
+// Copyright 2022 The MediaPipe Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.mediapipe.tasks.vision.core;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+
+import android.graphics.RectF;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/** Test for {@link ImageProcessingOptions}/ */
+@RunWith(AndroidJUnit4.class)
+public final class ImageProcessingOptionsTest {
+
+  @Test
+  public void succeedsWithValidInputs() throws Exception {
+    ImageProcessingOptions options =
+        ImageProcessingOptions.builder()
+            .setRegionOfInterest(new RectF(0.0f, 0.1f, 1.0f, 0.9f))
+            .setRotationDegrees(270)
+            .build();
+  }
+
+  @Test
+  public void failsWithLeftHigherThanRight() {
+    IllegalArgumentException exception =
+        assertThrows(
+            IllegalArgumentException.class,
+            () ->
+                ImageProcessingOptions.builder()
+                    .setRegionOfInterest(new RectF(0.9f, 0.0f, 0.1f, 1.0f))
+                    .build());
+    assertThat(exception).hasMessageThat().contains("Expected left < right and top < bottom");
+  }
+
+  @Test
+  public void failsWithBottomHigherThanTop() {
+    IllegalArgumentException exception =
+        assertThrows(
+            IllegalArgumentException.class,
+            () ->
+                ImageProcessingOptions.builder()
+                    .setRegionOfInterest(new RectF(0.0f, 0.9f, 1.0f, 0.1f))
+                    .build());
+    assertThat(exception).hasMessageThat().contains("Expected left < right and top < bottom");
+  }
+
+  @Test
+  public void failsWithInvalidRotation() {
+    IllegalArgumentException exception =
+        assertThrows(
+            IllegalArgumentException.class,
+            () -> ImageProcessingOptions.builder().setRotationDegrees(1).build());
+    assertThat(exception).hasMessageThat().contains("Expected rotation to be a multiple of 90°");
+  }
+}
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java
@ -19,6 +19,7 @@ import static org.junit.Assert.assertThrows;

 import android.content.res.AssetManager;
 import android.graphics.BitmapFactory;
+import android.graphics.RectF;
 import androidx.test.core.app.ApplicationProvider;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import com.google.common.truth.Correspondence;
@ -30,6 +31,7 @@ import com.google.mediapipe.tasks.components.containers.Category;
 import com.google.mediapipe.tasks.components.containers.Landmark;
 import com.google.mediapipe.tasks.components.containers.proto.LandmarksDetectionResultProto.LandmarksDetectionResult;
 import com.google.mediapipe.tasks.core.BaseOptions;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.gesturerecognizer.GestureRecognizer.GestureRecognizerOptions;
 import java.io.InputStream;
@ -46,11 +48,14 @@ public class GestureRecognizerTest {
  private static final String GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE = "gesture_recognizer.task";
  private static final String TWO_HANDS_IMAGE = "right_hands.jpg";
  private static final String THUMB_UP_IMAGE = "thumb_up.jpg";
+  private static final String POINTING_UP_ROTATED_IMAGE = "pointing_up_rotated.jpg";
  private static final String NO_HANDS_IMAGE = "cats_and_dogs.jpg";
  private static final String THUMB_UP_LANDMARKS = "thumb_up_landmarks.pb";
  private static final String TAG = "Gesture Recognizer Test";
  private static final String THUMB_UP_LABEL = "Thumb_Up";
  private static final int THUMB_UP_INDEX = 5;
+  private static final String POINTING_UP_LABEL = "Pointing_Up";
+  private static final int POINTING_UP_INDEX = 3;
  private static final float LANDMARKS_ERROR_TOLERANCE = 0.03f;
  private static final int IMAGE_WIDTH = 382;
  private static final int IMAGE_HEIGHT = 406;
@ -135,6 +140,53 @@ public class GestureRecognizerTest {
          gestureRecognizer.recognize(getImageFromAsset(TWO_HANDS_IMAGE));
      assertThat(actualResult.handednesses()).hasSize(2);
    }
+
+    @Test
+    public void recognize_successWithRotation() throws Exception {
+      GestureRecognizerOptions options =
+          GestureRecognizerOptions.builder()
+              .setBaseOptions(
+                  BaseOptions.builder()
+                      .setModelAssetPath(GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE)
+                      .build())
+              .setNumHands(1)
+              .build();
+      GestureRecognizer gestureRecognizer =
+          GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRotationDegrees(-90).build();
+      GestureRecognitionResult actualResult =
+          gestureRecognizer.recognize(
+              getImageFromAsset(POINTING_UP_ROTATED_IMAGE), imageProcessingOptions);
+      assertThat(actualResult.gestures()).hasSize(1);
+      assertThat(actualResult.gestures().get(0).get(0).index()).isEqualTo(POINTING_UP_INDEX);
+      assertThat(actualResult.gestures().get(0).get(0).categoryName()).isEqualTo(POINTING_UP_LABEL);
+    }
+
+    @Test
+    public void recognize_failsWithRegionOfInterest() throws Exception {
+      GestureRecognizerOptions options =
+          GestureRecognizerOptions.builder()
+              .setBaseOptions(
+                  BaseOptions.builder()
+                      .setModelAssetPath(GESTURE_RECOGNIZER_BUNDLE_ASSET_FILE)
+                      .build())
+              .setNumHands(1)
+              .build();
+      GestureRecognizer gestureRecognizer =
+          GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRegionOfInterest(new RectF(0, 0, 1, 1)).build();
+      IllegalArgumentException exception =
+          assertThrows(
+              IllegalArgumentException.class,
+              () ->
+                  gestureRecognizer.recognize(
+                      getImageFromAsset(THUMB_UP_IMAGE), imageProcessingOptions));
+      assertThat(exception)
+          .hasMessageThat()
+          .contains("GestureRecognizer doesn't support region-of-interest");
+    }
  }

  @RunWith(AndroidJUnit4.class)
@ -195,12 +247,16 @@ public class GestureRecognizerTest {
    MediaPipeException exception =
        assertThrows(
            MediaPipeException.class,
-            () -> gestureRecognizer.recognizeForVideo(getImageFromAsset(THUMB_UP_IMAGE), 0));
+            () ->
+                gestureRecognizer.recognizeForVideo(
+                    getImageFromAsset(THUMB_UP_IMAGE), /*timestampsMs=*/ 0));
    assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
    exception =
        assertThrows(
            MediaPipeException.class,
-            () -> gestureRecognizer.recognizeAsync(getImageFromAsset(THUMB_UP_IMAGE), 0));
+            () ->
+                gestureRecognizer.recognizeAsync(
+                    getImageFromAsset(THUMB_UP_IMAGE), /*timestampsMs=*/ 0));
    assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
  }

@ -225,7 +281,9 @@ public class GestureRecognizerTest {
    exception =
        assertThrows(
            MediaPipeException.class,
-            () -> gestureRecognizer.recognizeAsync(getImageFromAsset(THUMB_UP_IMAGE), 0));
+            () ->
+                gestureRecognizer.recognizeAsync(
+                    getImageFromAsset(THUMB_UP_IMAGE), /*timestampsMs=*/ 0));
    assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
  }

@ -251,7 +309,9 @@ public class GestureRecognizerTest {
    exception =
        assertThrows(
            MediaPipeException.class,
-            () -> gestureRecognizer.recognizeForVideo(getImageFromAsset(THUMB_UP_IMAGE), 0));
+            () ->
+                gestureRecognizer.recognizeForVideo(
+                    getImageFromAsset(THUMB_UP_IMAGE), /*timestampsMs=*/ 0));
    assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
  }

@ -291,7 +351,8 @@ public class GestureRecognizerTest {
        getExpectedGestureRecognitionResult(THUMB_UP_LANDMARKS, THUMB_UP_LABEL, THUMB_UP_INDEX);
    for (int i = 0; i < 3; i++) {
      GestureRecognitionResult actualResult =
-          gestureRecognizer.recognizeForVideo(getImageFromAsset(THUMB_UP_IMAGE), i);
+          gestureRecognizer.recognizeForVideo(
+              getImageFromAsset(THUMB_UP_IMAGE), /*timestampsMs=*/ i);
      assertActualResultApproximatelyEqualsToExpectedResult(actualResult, expectedResult);
    }
  }
@ -317,9 +378,11 @@ public class GestureRecognizerTest {
            .build();
    try (GestureRecognizer gestureRecognizer =
        GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
-      gestureRecognizer.recognizeAsync(image, 1);
+      gestureRecognizer.recognizeAsync(image, /*timestampsMs=*/ 1);
      MediaPipeException exception =
-          assertThrows(MediaPipeException.class, () -> gestureRecognizer.recognizeAsync(image, 0));
+          assertThrows(
+              MediaPipeException.class,
+              () -> gestureRecognizer.recognizeAsync(image, /*timestampsMs=*/ 0));
      assertThat(exception)
          .hasMessageThat()
          .contains("having a smaller timestamp than the processed timestamp");
@ -348,7 +411,7 @@ public class GestureRecognizerTest {
    try (GestureRecognizer gestureRecognizer =
        GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
      for (int i = 0; i < 3; i++) {
-        gestureRecognizer.recognizeAsync(image, i);
+        gestureRecognizer.recognizeAsync(image, /*timestampsMs=*/ i);
      }
    }
  }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/imageclassifier/ImageClassifierTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/imageclassifier/ImageClassifierTest.java
@ -29,6 +29,7 @@ import com.google.mediapipe.tasks.components.containers.Category;
 import com.google.mediapipe.tasks.components.processors.ClassifierOptions;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.core.TestUtils;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.imageclassifier.ImageClassifier.ImageClassifierOptions;
 import java.io.InputStream;
@ -47,7 +48,9 @@ public class ImageClassifierTest {
  private static final String FLOAT_MODEL_FILE = "mobilenet_v2_1.0_224.tflite";
  private static final String QUANTIZED_MODEL_FILE = "mobilenet_v1_0.25_224_quant.tflite";
  private static final String BURGER_IMAGE = "burger.jpg";
+  private static final String BURGER_ROTATED_IMAGE = "burger_rotated.jpg";
  private static final String MULTI_OBJECTS_IMAGE = "multi_objects.jpg";
+  private static final String MULTI_OBJECTS_ROTATED_IMAGE = "multi_objects_rotated.jpg";

  @RunWith(AndroidJUnit4.class)
  public static final class General extends ImageClassifierTest {
@ -209,13 +212,60 @@ public class ImageClassifierTest {
          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      // RectF around the soccer ball.
      RectF roi = new RectF(0.450f, 0.308f, 0.614f, 0.734f);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRegionOfInterest(roi).build();
      ImageClassificationResult results =
-          imageClassifier.classify(getImageFromAsset(MULTI_OBJECTS_IMAGE), roi);
+          imageClassifier.classify(getImageFromAsset(MULTI_OBJECTS_IMAGE), imageProcessingOptions);

      assertHasOneHeadAndOneTimestamp(results, 0);
      assertCategoriesAre(
          results, Arrays.asList(Category.create(0.9969325f, 806, "soccer ball", "")));
    }
+
+    @Test
+    public void classify_succeedsWithRotation() throws Exception {
+      ImageClassifierOptions options =
+          ImageClassifierOptions.builder()
+              .setBaseOptions(BaseOptions.builder().setModelAssetPath(FLOAT_MODEL_FILE).build())
+              .setClassifierOptions(ClassifierOptions.builder().setMaxResults(3).build())
+              .build();
+      ImageClassifier imageClassifier =
+          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRotationDegrees(-90).build();
+      ImageClassificationResult results =
+          imageClassifier.classify(getImageFromAsset(BURGER_ROTATED_IMAGE), imageProcessingOptions);
+
+      assertHasOneHeadAndOneTimestamp(results, 0);
+      assertCategoriesAre(
+          results,
+          Arrays.asList(
+              Category.create(0.6390683f, 934, "cheeseburger", ""),
+              Category.create(0.0495407f, 963, "meat loaf", ""),
+              Category.create(0.0469720f, 925, "guacamole", "")));
+    }
+
+    @Test
+    public void classify_succeedsWithRegionOfInterestAndRotation() throws Exception {
+      ImageClassifierOptions options =
+          ImageClassifierOptions.builder()
+              .setBaseOptions(BaseOptions.builder().setModelAssetPath(FLOAT_MODEL_FILE).build())
+              .setClassifierOptions(ClassifierOptions.builder().setMaxResults(1).build())
+              .build();
+      ImageClassifier imageClassifier =
+          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      // RectF around the chair.
+      RectF roi = new RectF(0.0f, 0.1763f, 0.5642f, 0.3049f);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRegionOfInterest(roi).setRotationDegrees(-90).build();
+      ImageClassificationResult results =
+          imageClassifier.classify(
+              getImageFromAsset(MULTI_OBJECTS_ROTATED_IMAGE), imageProcessingOptions);
+
+      assertHasOneHeadAndOneTimestamp(results, 0);
+      assertCategoriesAre(
+          results, Arrays.asList(Category.create(0.686824f, 560, "folding chair", "")));
+    }
  }

  @RunWith(AndroidJUnit4.class)
@ -269,12 +319,16 @@ public class ImageClassifierTest {
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
-              () -> imageClassifier.classifyForVideo(getImageFromAsset(BURGER_IMAGE), 0));
+              () ->
+                  imageClassifier.classifyForVideo(
+                      getImageFromAsset(BURGER_IMAGE), /*timestampMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> imageClassifier.classifyAsync(getImageFromAsset(BURGER_IMAGE), 0));
+              () ->
+                  imageClassifier.classifyAsync(
+                      getImageFromAsset(BURGER_IMAGE), /*timestampMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }

@ -296,7 +350,9 @@ public class ImageClassifierTest {
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> imageClassifier.classifyAsync(getImageFromAsset(BURGER_IMAGE), 0));
+              () ->
+                  imageClassifier.classifyAsync(
+                      getImageFromAsset(BURGER_IMAGE), /*timestampMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }

@ -320,7 +376,9 @@ public class ImageClassifierTest {
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> imageClassifier.classifyForVideo(getImageFromAsset(BURGER_IMAGE), 0));
+              () ->
+                  imageClassifier.classifyForVideo(
+                      getImageFromAsset(BURGER_IMAGE), /*timestampMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
    }

@ -352,7 +410,8 @@ public class ImageClassifierTest {
      ImageClassifier imageClassifier =
          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      for (int i = 0; i < 3; i++) {
-        ImageClassificationResult results = imageClassifier.classifyForVideo(image, i);
+        ImageClassificationResult results =
+            imageClassifier.classifyForVideo(image, /*timestampMs=*/ i);
        assertHasOneHeadAndOneTimestamp(results, i);
        assertCategoriesAre(
            results, Arrays.asList(Category.create(0.7952058f, 934, "cheeseburger", "")));
@ -377,9 +436,11 @@ public class ImageClassifierTest {
              .build();
      try (ImageClassifier imageClassifier =
          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
-        imageClassifier.classifyAsync(getImageFromAsset(BURGER_IMAGE), 1);
+        imageClassifier.classifyAsync(getImageFromAsset(BURGER_IMAGE), /*timestampMs=*/ 1);
        MediaPipeException exception =
-            assertThrows(MediaPipeException.class, () -> imageClassifier.classifyAsync(image, 0));
+            assertThrows(
+                MediaPipeException.class,
+                () -> imageClassifier.classifyAsync(image, /*timestampMs=*/ 0));
        assertThat(exception)
            .hasMessageThat()
            .contains("having a smaller timestamp than the processed timestamp");
@ -405,7 +466,7 @@ public class ImageClassifierTest {
      try (ImageClassifier imageClassifier =
          ImageClassifier.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
        for (int i = 0; i < 3; ++i) {
-          imageClassifier.classifyAsync(image, i);
+          imageClassifier.classifyAsync(image, /*timestampMs=*/ i);
        }
      }
    }
--- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/objectdetector/ObjectDetectorTest.java
+++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/objectdetector/ObjectDetectorTest.java
@ -29,6 +29,7 @@ import com.google.mediapipe.tasks.components.containers.Category;
 import com.google.mediapipe.tasks.components.containers.Detection;
 import com.google.mediapipe.tasks.core.BaseOptions;
 import com.google.mediapipe.tasks.core.TestUtils;
+import com.google.mediapipe.tasks.vision.core.ImageProcessingOptions;
 import com.google.mediapipe.tasks.vision.core.RunningMode;
 import com.google.mediapipe.tasks.vision.objectdetector.ObjectDetector.ObjectDetectorOptions;
 import java.io.InputStream;
@ -45,10 +46,11 @@ import org.junit.runners.Suite.SuiteClasses;
 public class ObjectDetectorTest {
  private static final String MODEL_FILE = "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.tflite";
  private static final String CAT_AND_DOG_IMAGE = "cats_and_dogs.jpg";
+  private static final String CAT_AND_DOG_ROTATED_IMAGE = "cats_and_dogs_rotated.jpg";
  private static final int IMAGE_WIDTH = 1200;
  private static final int IMAGE_HEIGHT = 600;
  private static final float CAT_SCORE = 0.69f;
-  private static final RectF catBoundingBox = new RectF(611, 164, 986, 596);
+  private static final RectF CAT_BOUNDING_BOX = new RectF(611, 164, 986, 596);
  // TODO: Figure out why android_x86 and android_arm tests have slightly different
  // scores (0.6875 vs 0.69921875).
  private static final float SCORE_DIFF_TOLERANCE = 0.01f;
@ -67,7 +69,7 @@ public class ObjectDetectorTest {
      ObjectDetector objectDetector =
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ObjectDetectionResult results = objectDetector.detect(getImageFromAsset(CAT_AND_DOG_IMAGE));
-      assertContainsOnlyCat(results, catBoundingBox, CAT_SCORE);
+      assertContainsOnlyCat(results, CAT_BOUNDING_BOX, CAT_SCORE);
    }

    @Test
@ -104,7 +106,7 @@ public class ObjectDetectorTest {
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ObjectDetectionResult results = objectDetector.detect(getImageFromAsset(CAT_AND_DOG_IMAGE));
      // The score threshold should block all other other objects, except cat.
-      assertContainsOnlyCat(results, catBoundingBox, CAT_SCORE);
+      assertContainsOnlyCat(results, CAT_BOUNDING_BOX, CAT_SCORE);
    }

    @Test
@ -175,7 +177,7 @@ public class ObjectDetectorTest {
      ObjectDetector objectDetector =
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ObjectDetectionResult results = objectDetector.detect(getImageFromAsset(CAT_AND_DOG_IMAGE));
-      assertContainsOnlyCat(results, catBoundingBox, CAT_SCORE);
+      assertContainsOnlyCat(results, CAT_BOUNDING_BOX, CAT_SCORE);
    }

    @Test
@ -228,6 +230,46 @@ public class ObjectDetectorTest {
          .contains("`category_allowlist` and `category_denylist` are mutually exclusive options.");
    }

+    @Test
+    public void detect_succeedsWithRotation() throws Exception {
+      ObjectDetectorOptions options =
+          ObjectDetectorOptions.builder()
+              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
+              .setMaxResults(1)
+              .setCategoryAllowlist(Arrays.asList("cat"))
+              .build();
+      ObjectDetector objectDetector =
+          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRotationDegrees(-90).build();
+      ObjectDetectionResult results =
+          objectDetector.detect(
+              getImageFromAsset(CAT_AND_DOG_ROTATED_IMAGE), imageProcessingOptions);
+
+      assertContainsOnlyCat(results, new RectF(22.0f, 611.0f, 452.0f, 890.0f), 0.7109375f);
+    }
+
+    @Test
+    public void detect_failsWithRegionOfInterest() throws Exception {
+      ObjectDetectorOptions options =
+          ObjectDetectorOptions.builder()
+              .setBaseOptions(BaseOptions.builder().setModelAssetPath(MODEL_FILE).build())
+              .build();
+      ObjectDetector objectDetector =
+          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
+      ImageProcessingOptions imageProcessingOptions =
+          ImageProcessingOptions.builder().setRegionOfInterest(new RectF(0, 0, 1, 1)).build();
+      IllegalArgumentException exception =
+          assertThrows(
+              IllegalArgumentException.class,
+              () ->
+                  objectDetector.detect(
+                      getImageFromAsset(CAT_AND_DOG_IMAGE), imageProcessingOptions));
+      assertThat(exception)
+          .hasMessageThat()
+          .contains("ObjectDetector doesn't support region-of-interest");
+    }
+
    // TODO: Implement detect_succeedsWithFloatImages, detect_succeedsWithOrientation,
    // detect_succeedsWithNumThreads, detect_successWithNumThreadsFromBaseOptions,
    // detect_failsWithInvalidNegativeNumThreads, detect_failsWithInvalidNumThreadsAsZero.
@ -282,12 +324,16 @@ public class ObjectDetectorTest {
      MediaPipeException exception =
          assertThrows(
              MediaPipeException.class,
-              () -> objectDetector.detectForVideo(getImageFromAsset(CAT_AND_DOG_IMAGE), 0));
+              () ->
+                  objectDetector.detectForVideo(
+                      getImageFromAsset(CAT_AND_DOG_IMAGE), /*timestampsMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> objectDetector.detectAsync(getImageFromAsset(CAT_AND_DOG_IMAGE), 0));
+              () ->
+                  objectDetector.detectAsync(
+                      getImageFromAsset(CAT_AND_DOG_IMAGE), /*timestampsMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }

@ -309,7 +355,9 @@ public class ObjectDetectorTest {
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> objectDetector.detectAsync(getImageFromAsset(CAT_AND_DOG_IMAGE), 0));
+              () ->
+                  objectDetector.detectAsync(
+                      getImageFromAsset(CAT_AND_DOG_IMAGE), /*timestampsMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the live stream mode");
    }

@ -333,7 +381,9 @@ public class ObjectDetectorTest {
      exception =
          assertThrows(
              MediaPipeException.class,
-              () -> objectDetector.detectForVideo(getImageFromAsset(CAT_AND_DOG_IMAGE), 0));
+              () ->
+                  objectDetector.detectForVideo(
+                      getImageFromAsset(CAT_AND_DOG_IMAGE), /*timestampsMs=*/ 0));
      assertThat(exception).hasMessageThat().contains("not initialized with the video mode");
    }

@ -348,7 +398,7 @@ public class ObjectDetectorTest {
      ObjectDetector objectDetector =
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      ObjectDetectionResult results = objectDetector.detect(getImageFromAsset(CAT_AND_DOG_IMAGE));
-      assertContainsOnlyCat(results, catBoundingBox, CAT_SCORE);
+      assertContainsOnlyCat(results, CAT_BOUNDING_BOX, CAT_SCORE);
    }

    @Test
@ -363,8 +413,9 @@ public class ObjectDetectorTest {
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options);
      for (int i = 0; i < 3; i++) {
        ObjectDetectionResult results =
-            objectDetector.detectForVideo(getImageFromAsset(CAT_AND_DOG_IMAGE), i);
-        assertContainsOnlyCat(results, catBoundingBox, CAT_SCORE);
+            objectDetector.detectForVideo(
+                getImageFromAsset(CAT_AND_DOG_IMAGE), /*timestampsMs=*/ i);
+        assertContainsOnlyCat(results, CAT_BOUNDING_BOX, CAT_SCORE);
      }
    }

@ -377,16 +428,18 @@ public class ObjectDetectorTest {
              .setRunningMode(RunningMode.LIVE_STREAM)
              .setResultListener(
                  (objectDetectionResult, inputImage) -> {
-                    assertContainsOnlyCat(objectDetectionResult, catBoundingBox, CAT_SCORE);
+                    assertContainsOnlyCat(objectDetectionResult, CAT_BOUNDING_BOX, CAT_SCORE);
                    assertImageSizeIsExpected(inputImage);
                  })
              .setMaxResults(1)
              .build();
      try (ObjectDetector objectDetector =
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
-        objectDetector.detectAsync(image, 1);
+        objectDetector.detectAsync(image, /*timestampsMs=*/ 1);
        MediaPipeException exception =
-            assertThrows(MediaPipeException.class, () -> objectDetector.detectAsync(image, 0));
+            assertThrows(
+                MediaPipeException.class,
+                () -> objectDetector.detectAsync(image, /*timestampsMs=*/ 0));
        assertThat(exception)
            .hasMessageThat()
            .contains("having a smaller timestamp than the processed timestamp");
@ -402,7 +455,7 @@ public class ObjectDetectorTest {
              .setRunningMode(RunningMode.LIVE_STREAM)
              .setResultListener(
                  (objectDetectionResult, inputImage) -> {
-                    assertContainsOnlyCat(objectDetectionResult, catBoundingBox, CAT_SCORE);
+                    assertContainsOnlyCat(objectDetectionResult, CAT_BOUNDING_BOX, CAT_SCORE);
                    assertImageSizeIsExpected(inputImage);
                  })
              .setMaxResults(1)
@ -410,7 +463,7 @@ public class ObjectDetectorTest {
      try (ObjectDetector objectDetector =
          ObjectDetector.createFromOptions(ApplicationProvider.getApplicationContext(), options)) {
        for (int i = 0; i < 3; i++) {
-          objectDetector.detectAsync(image, i);
+          objectDetector.detectAsync(image, /*timestampsMs=*/ i);
        }
      }
    }