From 4a6c23a76a70369ba5a1a65789fcfc2d6497cc82 Mon Sep 17 00:00:00 2001
From: MediaPipe Team <mediapipe-team@google.com>
Date: Fri, 21 Oct 2022 15:47:13 -0700
Subject: [PATCH] Internal change

PiperOrigin-RevId: 482906478
---
 mediapipe/tasks/cc/vision/core/BUILD          |  11 ++
 .../cc/vision/core/base_vision_task_api.h     |  59 ++++++++++
 .../cc/vision/core/image_processing_options.h |  52 +++++++++
 .../tasks/cc/vision/gesture_recognizer/BUILD  |   1 +
 .../gesture_recognizer/gesture_recognizer.cc  |  47 +++-----
 .../gesture_recognizer/gesture_recognizer.h   |  36 +++---
 .../tasks/cc/vision/image_classifier/BUILD    |   1 +
 .../image_classifier/image_classifier.cc      |  37 ++----
 .../image_classifier/image_classifier.h       |  31 ++---
 .../image_classifier/image_classifier_test.cc | 109 +++++++++++++-----
 .../tasks/cc/vision/object_detector/BUILD     |   1 +
 .../vision/object_detector/object_detector.cc |  47 +++-----
 .../vision/object_detector/object_detector.h  |  29 +++--
 .../object_detector/object_detector_test.cc   |  22 ++--
 14 files changed, 302 insertions(+), 181 deletions(-)
 create mode 100644 mediapipe/tasks/cc/vision/core/image_processing_options.h
diff --git a/mediapipe/tasks/cc/vision/core/BUILD b/mediapipe/tasks/cc/vision/core/BUILD
index 12d789901..e8e197a1d 100644
--- a/mediapipe/tasks/cc/vision/core/BUILD
+++ b/mediapipe/tasks/cc/vision/core/BUILD
@@ -21,12 +21,23 @@ cc_library(
     hdrs = ["running_mode.h"],
 )
 
+cc_library(
+    name = "image_processing_options",
+    hdrs = ["image_processing_options.h"],
+    deps = [
+        "//mediapipe/tasks/cc/components/containers:rect",
+    ],
+)
+
 cc_library(
     name = "base_vision_task_api",
     hdrs = ["base_vision_task_api.h"],
     deps = [
+        ":image_processing_options",
         ":running_mode",
         "//mediapipe/calculators/core:flow_limiter_calculator",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/tasks/cc/components/containers:rect",
         "//mediapipe/tasks/cc/core:base_task_api",
         "//mediapipe/tasks/cc/core:task_runner",
         "@com_google_absl//absl/status",
diff --git a/mediapipe/tasks/cc/vision/core/base_vision_task_api.h b/mediapipe/tasks/cc/vision/core/base_vision_task_api.h
index 4586cbbdd..c3c0a0261 100644
--- a/mediapipe/tasks/cc/vision/core/base_vision_task_api.h
+++ b/mediapipe/tasks/cc/vision/core/base_vision_task_api.h
@@ -16,15 +16,20 @@ limitations under the License.
 #ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
 #define MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
 
+#include <cmath>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_cat.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/tasks/cc/components/containers/rect.h"
 #include "mediapipe/tasks/cc/core/base_task_api.h"
 #include "mediapipe/tasks/cc/core/task_runner.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 
 namespace mediapipe {
@@ -87,6 +92,60 @@ class BaseVisionTaskApi : public tasks::core::BaseTaskApi {
     return runner_->Send(std::move(inputs));
   }
 
+  // Convert from ImageProcessingOptions to NormalizedRect, performing sanity
+  // checks on-the-fly. If the input ImageProcessingOptions is not present,
+  // returns a default NormalizedRect covering the whole image with rotation set
+  // to 0. If 'roi_allowed' is false, an error will be returned if the input
+  // ImageProcessingOptions has its 'region_or_interest' field set.
+  static absl::StatusOr<mediapipe::NormalizedRect> ConvertToNormalizedRect(
+      std::optional<ImageProcessingOptions> options, bool roi_allowed = true) {
+    mediapipe::NormalizedRect normalized_rect;
+    normalized_rect.set_rotation(0);
+    normalized_rect.set_x_center(0.5);
+    normalized_rect.set_y_center(0.5);
+    normalized_rect.set_width(1.0);
+    normalized_rect.set_height(1.0);
+    if (!options.has_value()) {
+      return normalized_rect;
+    }
+
+    if (options->rotation_degrees % 90 != 0) {
+      return CreateStatusWithPayload(
+          absl::StatusCode::kInvalidArgument,
+          "Expected rotation to be a multiple of 90°.",
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
+    }
+    // Convert to radians counter-clockwise.
+    normalized_rect.set_rotation(-options->rotation_degrees * M_PI / 180.0);
+
+    if (options->region_of_interest.has_value()) {
+      if (!roi_allowed) {
+        return CreateStatusWithPayload(
+            absl::StatusCode::kInvalidArgument,
+            "This task doesn't support region-of-interest.",
+            MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
+      }
+      auto& roi = *options->region_of_interest;
+      if (roi.left >= roi.right || roi.top >= roi.bottom) {
+        return CreateStatusWithPayload(
+            absl::StatusCode::kInvalidArgument,
+            "Expected Rect with left < right and top < bottom.",
+            MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
+      }
+      if (roi.left < 0 || roi.top < 0 || roi.right > 1 || roi.bottom > 1) {
+        return CreateStatusWithPayload(
+            absl::StatusCode::kInvalidArgument,
+            "Expected Rect values to be in [0,1].",
+            MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
+      }
+      normalized_rect.set_x_center((roi.left + roi.right) / 2.0);
+      normalized_rect.set_y_center((roi.top + roi.bottom) / 2.0);
+      normalized_rect.set_width(roi.right - roi.left);
+      normalized_rect.set_height(roi.bottom - roi.top);
+    }
+    return normalized_rect;
+  }
+
  private:
   RunningMode running_mode_;
 };
diff --git a/mediapipe/tasks/cc/vision/core/image_processing_options.h b/mediapipe/tasks/cc/vision/core/image_processing_options.h
new file mode 100644
index 000000000..7e764c1fe
--- /dev/null
+++ b/mediapipe/tasks/cc/vision/core/image_processing_options.h
@@ -0,0 +1,52 @@
+/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
+#define MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
+
+#include <optional>
+
+#include "mediapipe/tasks/cc/components/containers/rect.h"
+
+namespace mediapipe {
+namespace tasks {
+namespace vision {
+namespace core {
+
+// Options for image processing.
+//
+// If both region-or-interest and rotation are specified, the crop around the
+// region-of-interest is extracted first, the the specified rotation is applied
+// to the crop.
+struct ImageProcessingOptions {
+  // The optional region-of-interest to crop from the image. If not specified,
+  // the full image is used.
+  //
+  // Coordinates must be in [0,1] with 'left' < 'right' and 'top' < bottom.
+  std::optional<components::containers::Rect> region_of_interest = std::nullopt;
+
+  // The rotation to apply to the image (or cropped region-of-interest), in
+  // degrees clockwise.
+  //
+  // The rotation must be a multiple (positive or negative) of 90°.
+  int rotation_degrees = 0;
+};
+
+}  // namespace core
+}  // namespace vision
+}  // namespace tasks
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
diff --git a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
index e5b1f0479..a766c6b3f 100644
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/BUILD
@@ -137,6 +137,7 @@ cc_library(
         "//mediapipe/tasks/cc/core:utils",
         "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
         "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
+        "//mediapipe/tasks/cc/vision/core:image_processing_options",
         "//mediapipe/tasks/cc/vision/core:running_mode",
         "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
         "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_cc_proto",
diff --git a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
index 333edb6fb..000a2e141 100644
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "mediapipe/tasks/cc/core/task_runner.h"
 #include "mediapipe/tasks/cc/core/utils.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h"
 #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
@@ -76,31 +77,6 @@ constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
 constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
 constexpr int kMicroSecondsPerMilliSecond = 1000;
 
-// Returns a NormalizedRect filling the whole image. If input is present, its
-// rotation is set in the returned NormalizedRect and a check is performed to
-// make sure no region-of-interest was provided. Otherwise, rotation is set to
-// 0.
-absl::StatusOr<NormalizedRect> FillNormalizedRect(
-    std::optional<NormalizedRect> normalized_rect) {
-  NormalizedRect result;
-  if (normalized_rect.has_value()) {
-    result = *normalized_rect;
-  }
-  bool has_coordinates = result.has_x_center() || result.has_y_center() ||
-                         result.has_width() || result.has_height();
-  if (has_coordinates) {
-    return CreateStatusWithPayload(
-        absl::StatusCode::kInvalidArgument,
-        "GestureRecognizer does not support region-of-interest.",
-        MediaPipeTasksStatus::kInvalidArgumentError);
-  }
-  result.set_x_center(0.5);
-  result.set_y_center(0.5);
-  result.set_width(1);
-  result.set_height(1);
-  return result;
-}
-
 // Creates a MediaPipe graph config that contains a subgraph node of
 // "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
 // in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
@@ -248,15 +224,16 @@ absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
 
 absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
     mediapipe::Image image,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         "GPU input images are currently not supported.",
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessImageData(
@@ -283,15 +260,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
 
 absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
     mediapipe::Image image, int64 timestamp_ms,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         absl::StrCat("GPU input images are currently not supported."),
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessVideoData(
@@ -321,15 +299,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
 
 absl::Status GestureRecognizer::RecognizeAsync(
     mediapipe::Image image, int64 timestamp_ms,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         absl::StrCat("GPU input images are currently not supported."),
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   return SendLiveStreamData(
       {{kImageInStreamName,
         MakePacket<Image>(std::move(image))
diff --git a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h
index 750a99797..29c8bea7b 100644
--- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h
+++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.h
@@ -23,10 +23,10 @@ limitations under the License.
 #include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
-#include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 
 namespace mediapipe {
@@ -129,36 +129,36 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
   // Only use this method when the GestureRecognizer is created with the image
   // running mode.
   //
-  // image - mediapipe::Image
-  //   Image to perform hand gesture recognition on.
-  // imageProcessingOptions - std::optional<NormalizedRect>
-  //   If provided, can be used to specify the rotation to apply to the image
-  //   before performing classification, by setting its 'rotation' field in
-  //   radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note that
-  //   specifying a region-of-interest using the 'x_center', 'y_center', 'width'
-  //   and 'height' fields is NOT supported and will result in an invalid
-  //   argument error being returned.
+  // The optional 'image_processing_options' parameter can be used to specify
+  // the rotation to apply to the image before performing recognition, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
+  // and will result in an invalid argument error being returned.
   //
   // The image can be of any size with format RGB or RGBA.
   // TODO: Describes how the input image will be preprocessed
   // after the yuv support is implemented.
-  // TODO: use an ImageProcessingOptions struct instead of
-  // NormalizedRect.
   absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
       Image image,
-      std::optional<mediapipe::NormalizedRect> image_processing_options =
+      std::optional<core::ImageProcessingOptions> image_processing_options =
           std::nullopt);
 
   // Performs gesture recognition on the provided video frame.
   // Only use this method when the GestureRecognizer is created with the video
   // running mode.
   //
+  // The optional 'image_processing_options' parameter can be used to specify
+  // the rotation to apply to the image before performing recognition, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
+  // and will result in an invalid argument error being returned.
+  //
   // The image can be of any size with format RGB or RGBA. It's required to
   // provide the video frame's timestamp (in milliseconds). The input timestamps
   // must be monotonically increasing.
   absl::StatusOr<components::containers::GestureRecognitionResult>
   RecognizeForVideo(Image image, int64 timestamp_ms,
-                    std::optional<mediapipe::NormalizedRect>
+                    std::optional<core::ImageProcessingOptions>
                         image_processing_options = std::nullopt);
 
   // Sends live image data to perform gesture recognition, and the results will
@@ -171,6 +171,12 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
   // sent to the gesture recognizer. The input timestamps must be monotonically
   // increasing.
   //
+  // The optional 'image_processing_options' parameter can be used to specify
+  // the rotation to apply to the image before performing recognition, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
+  // and will result in an invalid argument error being returned.
+  //
   // The "result_callback" provides
   //   - A vector of GestureRecognitionResult, each is the recognized results
   //     for a input frame.
@@ -180,7 +186,7 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
   //     outside of the callback, callers need to make a copy of the image.
   //   - The input timestamp in milliseconds.
   absl::Status RecognizeAsync(Image image, int64 timestamp_ms,
-                              std::optional<mediapipe::NormalizedRect>
+                              std::optional<core::ImageProcessingOptions>
                                   image_processing_options = std::nullopt);
 
   // Shuts down the GestureRecognizer when all works are done.
diff --git a/mediapipe/tasks/cc/vision/image_classifier/BUILD b/mediapipe/tasks/cc/vision/image_classifier/BUILD
index dfa77cb96..3d655cd50 100644
--- a/mediapipe/tasks/cc/vision/image_classifier/BUILD
+++ b/mediapipe/tasks/cc/vision/image_classifier/BUILD
@@ -59,6 +59,7 @@ cc_library(
         "//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
         "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
         "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
+        "//mediapipe/tasks/cc/vision/core:image_processing_options",
         "//mediapipe/tasks/cc/vision/core:running_mode",
         "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
         "//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_cc_proto",
diff --git a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc
index f3dcdd07d..8a32758f4 100644
--- a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc
+++ b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
 #include "mediapipe/tasks/cc/core/task_runner.h"
 #include "mediapipe/tasks/cc/core/utils.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
 #include "mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options.pb.h"
@@ -59,26 +60,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
 using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
 using ::mediapipe::tasks::core::PacketMap;
 
-// Returns a NormalizedRect covering the full image if input is not present.
-// Otherwise, makes sure the x_center, y_center, width and height are set in
-// case only a rotation was provided in the input.
-NormalizedRect FillNormalizedRect(
-    std::optional<NormalizedRect> normalized_rect) {
-  NormalizedRect result;
-  if (normalized_rect.has_value()) {
-    result = *normalized_rect;
-  }
-  bool has_coordinates = result.has_x_center() || result.has_y_center() ||
-                         result.has_width() || result.has_height();
-  if (!has_coordinates) {
-    result.set_x_center(0.5);
-    result.set_y_center(0.5);
-    result.set_width(1);
-    result.set_height(1);
-  }
-  return result;
-}
-
 // Creates a MediaPipe graph config that contains a subgraph node of
 // type "ImageClassifierGraph". If the task is running in the live stream mode,
 // a "FlowLimiterCalculator" will be added to limit the number of frames in
@@ -164,14 +145,16 @@ absl::StatusOr<std::unique_ptr<ImageClassifier>> ImageClassifier::Create(
 }
 
 absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
-    Image image, std::optional<NormalizedRect> image_processing_options) {
+    Image image,
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         "GPU input images are currently not supported.",
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
+  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
+                   ConvertToNormalizedRect(image_processing_options));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessImageData(
@@ -183,14 +166,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
 
 absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
     Image image, int64 timestamp_ms,
-    std::optional<NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         "GPU input images are currently not supported.",
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
+  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
+                   ConvertToNormalizedRect(image_processing_options));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessVideoData(
@@ -206,14 +190,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
 
 absl::Status ImageClassifier::ClassifyAsync(
     Image image, int64 timestamp_ms,
-    std::optional<NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         "GPU input images are currently not supported.",
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
+  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
+                   ConvertToNormalizedRect(image_processing_options));
   return SendLiveStreamData(
       {{kImageInStreamName,
         MakePacket<Image>(std::move(image))
diff --git a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.h b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.h
index 5dff06cc7..de69b7994 100644
--- a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.h
+++ b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.h
@@ -22,11 +22,11 @@ limitations under the License.
 
 #include "absl/status/statusor.h"
 #include "mediapipe/framework/formats/image.h"
-#include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
 #include "mediapipe/tasks/cc/components/processors/classifier_options.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 
 namespace mediapipe {
@@ -109,12 +109,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   //
   // The optional 'image_processing_options' parameter can be used to specify:
   // - the rotation to apply to the image before performing classification, by
-  //   setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  //   anti-clockwise rotation).
+  //   setting its 'rotation_degrees' field.
   // and/or
   // - the region-of-interest on which to perform classification, by setting its
-  //  'x_center', 'y_center', 'width' and 'height' fields. If none of these is
-  //  set, they will automatically be set to cover the full image.
+  //  'region_of_interest' field. If not specified, the full image is used.
   // If both are specified, the crop around the region-of-interest is extracted
   // first, then the specified rotation is applied to the crop.
   //
@@ -126,19 +124,17 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   // YUVToImageCalculator is integrated.
   absl::StatusOr<components::containers::proto::ClassificationResult> Classify(
       mediapipe::Image image,
-      std::optional<mediapipe::NormalizedRect> image_processing_options =
+      std::optional<core::ImageProcessingOptions> image_processing_options =
           std::nullopt);
 
   // Performs image classification on the provided video frame.
   //
   // The optional 'image_processing_options' parameter can be used to specify:
   // - the rotation to apply to the image before performing classification, by
-  //   setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  //   anti-clockwise rotation).
+  //   setting its 'rotation_degrees' field.
   // and/or
   // - the region-of-interest on which to perform classification, by setting its
-  //  'x_center', 'y_center', 'width' and 'height' fields. If none of these is
-  //  set, they will automatically be set to cover the full image.
+  //  'region_of_interest' field. If not specified, the full image is used.
   // If both are specified, the crop around the region-of-interest is extracted
   // first, then the specified rotation is applied to the crop.
   //
@@ -150,7 +146,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   // must be monotonically increasing.
   absl::StatusOr<components::containers::proto::ClassificationResult>
   ClassifyForVideo(mediapipe::Image image, int64 timestamp_ms,
-                   std::optional<mediapipe::NormalizedRect>
+                   std::optional<core::ImageProcessingOptions>
                        image_processing_options = std::nullopt);
 
   // Sends live image data to image classification, and the results will be
@@ -158,12 +154,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   //
   // The optional 'image_processing_options' parameter can be used to specify:
   // - the rotation to apply to the image before performing classification, by
-  //   setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  //   anti-clockwise rotation).
+  //   setting its 'rotation_degrees' field.
   // and/or
   // - the region-of-interest on which to perform classification, by setting its
-  //  'x_center', 'y_center', 'width' and 'height' fields. If none of these is
-  //  set, they will automatically be set to cover the full image.
+  //  'region_of_interest' field. If not specified, the full image is used.
   // If both are specified, the crop around the region-of-interest is extracted
   // first, then the specified rotation is applied to the crop.
   //
@@ -175,7 +169,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   // sent to the object detector. The input timestamps must be monotonically
   // increasing.
   //
-  // The "result_callback" prvoides
+  // The "result_callback" provides:
   //   - The classification results as a ClassificationResult object.
   //   - The const reference to the corresponding input image that the image
   //     classifier runs on. Note that the const reference to the image will no
@@ -183,12 +177,9 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
   //     outside of the callback, callers need to make a copy of the image.
   //   - The input timestamp in milliseconds.
   absl::Status ClassifyAsync(mediapipe::Image image, int64 timestamp_ms,
-                             std::optional<mediapipe::NormalizedRect>
+                             std::optional<core::ImageProcessingOptions>
                                  image_processing_options = std::nullopt);
 
-  // TODO: add Classify() variants taking a region of interest as
-  // additional argument.
-
   // Shuts down the ImageClassifier when all works are done.
   absl::Status Close() { return runner_->Close(); }
 };
diff --git a/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc b/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc
index 55830e520..0c45122c0 100644
--- a/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc
+++ b/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc
@@ -27,7 +27,6 @@ limitations under the License.
 #include "absl/strings/str_format.h"
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/framework/formats/image.h"
-#include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
@@ -35,6 +34,8 @@ limitations under the License.
 #include "mediapipe/tasks/cc/common.h"
 #include "mediapipe/tasks/cc/components/containers/proto/category.pb.h"
 #include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
+#include "mediapipe/tasks/cc/components/containers/rect.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
 #include "tensorflow/lite/core/api/op_resolver.h"
@@ -49,9 +50,11 @@ namespace image_classifier {
 namespace {
 
 using ::mediapipe::file::JoinPath;
+using ::mediapipe::tasks::components::containers::Rect;
 using ::mediapipe::tasks::components::containers::proto::ClassificationEntry;
 using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
 using ::mediapipe::tasks::components::containers::proto::Classifications;
+using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
 using ::testing::HasSubstr;
 using ::testing::Optional;
 
@@ -547,12 +550,9 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterest) {
   options->classifier_options.max_results = 1;
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
                           ImageClassifier::Create(std::move(options)));
-  // Crop around the soccer ball.
-  NormalizedRect image_processing_options;
-  image_processing_options.set_x_center(0.532);
-  image_processing_options.set_y_center(0.521);
-  image_processing_options.set_width(0.164);
-  image_processing_options.set_height(0.427);
+  // Region-of-interest around the soccer ball.
+  Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
+  ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
 
   MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
                                             image, image_processing_options));
@@ -572,8 +572,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
                           ImageClassifier::Create(std::move(options)));
 
   // Specify a 90° anti-clockwise rotation.
-  NormalizedRect image_processing_options;
-  image_processing_options.set_rotation(M_PI / 2.0);
+  ImageProcessingOptions image_processing_options;
+  image_processing_options.rotation_degrees = -90;
 
   MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
                                             image, image_processing_options));
@@ -616,13 +616,10 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
   options->classifier_options.max_results = 1;
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
                           ImageClassifier::Create(std::move(options)));
-  // Crop around the chair, with 90° anti-clockwise rotation.
-  NormalizedRect image_processing_options;
-  image_processing_options.set_x_center(0.2821);
-  image_processing_options.set_y_center(0.2406);
-  image_processing_options.set_width(0.5642);
-  image_processing_options.set_height(0.1286);
-  image_processing_options.set_rotation(M_PI / 2.0);
+  // Region-of-interest around the chair, with 90° anti-clockwise rotation.
+  Rect roi{/*left=*/0.006, /*top=*/0.1763, /*right=*/0.5702, /*bottom=*/0.3049};
+  ImageProcessingOptions image_processing_options{roi,
+                                                  /*rotation_degrees=*/-90};
 
   MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
                                             image, image_processing_options));
@@ -633,7 +630,7 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
                                       entries {
                                         categories {
                                           index: 560
-                                          score: 0.6800408
+                                          score: 0.6522213
                                           category_name: "folding chair"
                                         }
                                         timestamp_ms: 0
@@ -643,6 +640,69 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
                                     })pb"));
 }
 
+// Testing all these once with ImageClassifier.
+TEST_F(ImageModeTest, FailsWithInvalidImageProcessingOptions) {
+  MP_ASSERT_OK_AND_ASSIGN(Image image,
+                          DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
+                                                       "multi_objects.jpg")));
+  auto options = std::make_unique<ImageClassifierOptions>();
+  options->base_options.model_asset_path =
+      JoinPath("./", kTestDataDirectory, kMobileNetFloatWithMetadata);
+  MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
+                          ImageClassifier::Create(std::move(options)));
+
+  // Invalid: left > right.
+  Rect roi{/*left=*/0.9, /*top=*/0, /*right=*/0.1, /*bottom=*/1};
+  ImageProcessingOptions image_processing_options{roi,
+                                                  /*rotation_degrees=*/0};
+  auto results = image_classifier->Classify(image, image_processing_options);
+  EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
+  EXPECT_THAT(results.status().message(),
+              HasSubstr("Expected Rect with left < right and top < bottom"));
+  EXPECT_THAT(
+      results.status().GetPayload(kMediaPipeTasksPayload),
+      Optional(absl::Cord(absl::StrCat(
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
+
+  // Invalid: top > bottom.
+  roi = {/*left=*/0, /*top=*/0.9, /*right=*/1, /*bottom=*/0.1};
+  image_processing_options = {roi,
+                              /*rotation_degrees=*/0};
+  results = image_classifier->Classify(image, image_processing_options);
+  EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
+  EXPECT_THAT(results.status().message(),
+              HasSubstr("Expected Rect with left < right and top < bottom"));
+  EXPECT_THAT(
+      results.status().GetPayload(kMediaPipeTasksPayload),
+      Optional(absl::Cord(absl::StrCat(
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
+
+  // Invalid: coordinates out of [0,1] range.
+  roi = {/*left=*/-0.1, /*top=*/0, /*right=*/1, /*bottom=*/1};
+  image_processing_options = {roi,
+                              /*rotation_degrees=*/0};
+  results = image_classifier->Classify(image, image_processing_options);
+  EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
+  EXPECT_THAT(results.status().message(),
+              HasSubstr("Expected Rect values to be in [0,1]"));
+  EXPECT_THAT(
+      results.status().GetPayload(kMediaPipeTasksPayload),
+      Optional(absl::Cord(absl::StrCat(
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
+
+  // Invalid: rotation not a multiple of 90°.
+  image_processing_options = {/*region_of_interest=*/std::nullopt,
+                              /*rotation_degrees=*/1};
+  results = image_classifier->Classify(image, image_processing_options);
+  EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
+  EXPECT_THAT(results.status().message(),
+              HasSubstr("Expected rotation to be a multiple of 90°"));
+  EXPECT_THAT(
+      results.status().GetPayload(kMediaPipeTasksPayload),
+      Optional(absl::Cord(absl::StrCat(
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
+}
+
 class VideoModeTest : public tflite_shims::testing::Test {};
 
 TEST_F(VideoModeTest, FailsWithCallingWrongMethod) {
@@ -732,11 +792,9 @@ TEST_F(VideoModeTest, SucceedsWithRegionOfInterest) {
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
                           ImageClassifier::Create(std::move(options)));
   // Crop around the soccer ball.
-  NormalizedRect image_processing_options;
-  image_processing_options.set_x_center(0.532);
-  image_processing_options.set_y_center(0.521);
-  image_processing_options.set_width(0.164);
-  image_processing_options.set_height(0.427);
+  // Region-of-interest around the soccer ball.
+  Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
+  ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
 
   for (int i = 0; i < iterations; ++i) {
     MP_ASSERT_OK_AND_ASSIGN(
@@ -877,11 +935,8 @@ TEST_F(LiveStreamModeTest, SucceedsWithRegionOfInterest) {
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
                           ImageClassifier::Create(std::move(options)));
   // Crop around the soccer ball.
-  NormalizedRect image_processing_options;
-  image_processing_options.set_x_center(0.532);
-  image_processing_options.set_y_center(0.521);
-  image_processing_options.set_width(0.164);
-  image_processing_options.set_height(0.427);
+  Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
+  ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
 
   for (int i = 0; i < iterations; ++i) {
     MP_ASSERT_OK(
diff --git a/mediapipe/tasks/cc/vision/object_detector/BUILD b/mediapipe/tasks/cc/vision/object_detector/BUILD
index 186909509..8220d8b7f 100644
--- a/mediapipe/tasks/cc/vision/object_detector/BUILD
+++ b/mediapipe/tasks/cc/vision/object_detector/BUILD
@@ -75,6 +75,7 @@ cc_library(
         "//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
         "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
         "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
+        "//mediapipe/tasks/cc/vision/core:image_processing_options",
         "//mediapipe/tasks/cc/vision/core:running_mode",
         "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
         "//mediapipe/tasks/cc/vision/object_detector/proto:object_detector_options_cc_proto",
diff --git a/mediapipe/tasks/cc/vision/object_detector/object_detector.cc b/mediapipe/tasks/cc/vision/object_detector/object_detector.cc
index 9149a3cbe..dd19237ff 100644
--- a/mediapipe/tasks/cc/vision/object_detector/object_detector.cc
+++ b/mediapipe/tasks/cc/vision/object_detector/object_detector.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
 #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
 #include "mediapipe/tasks/cc/core/utils.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
 #include "mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options.pb.h"
@@ -58,31 +59,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
 using ObjectDetectorOptionsProto =
     object_detector::proto::ObjectDetectorOptions;
 
-// Returns a NormalizedRect filling the whole image. If input is present, its
-// rotation is set in the returned NormalizedRect and a check is performed to
-// make sure no region-of-interest was provided. Otherwise, rotation is set to
-// 0.
-absl::StatusOr<NormalizedRect> FillNormalizedRect(
-    std::optional<NormalizedRect> normalized_rect) {
-  NormalizedRect result;
-  if (normalized_rect.has_value()) {
-    result = *normalized_rect;
-  }
-  bool has_coordinates = result.has_x_center() || result.has_y_center() ||
-                         result.has_width() || result.has_height();
-  if (has_coordinates) {
-    return CreateStatusWithPayload(
-        absl::StatusCode::kInvalidArgument,
-        "ObjectDetector does not support region-of-interest.",
-        MediaPipeTasksStatus::kInvalidArgumentError);
-  }
-  result.set_x_center(0.5);
-  result.set_y_center(0.5);
-  result.set_width(1);
-  result.set_height(1);
-  return result;
-}
-
 // Creates a MediaPipe graph config that contains a subgraph node of
 // "mediapipe.tasks.vision.ObjectDetectorGraph". If the task is running in the
 // live stream mode, a "FlowLimiterCalculator" will be added to limit the
@@ -170,15 +146,16 @@ absl::StatusOr<std::unique_ptr<ObjectDetector>> ObjectDetector::Create(
 
 absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
     mediapipe::Image image,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         absl::StrCat("GPU input images are currently not supported."),
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessImageData(
@@ -189,15 +166,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
 
 absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
     mediapipe::Image image, int64 timestamp_ms,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         absl::StrCat("GPU input images are currently not supported."),
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   ASSIGN_OR_RETURN(
       auto output_packets,
       ProcessVideoData(
@@ -212,15 +190,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
 
 absl::Status ObjectDetector::DetectAsync(
     Image image, int64 timestamp_ms,
-    std::optional<mediapipe::NormalizedRect> image_processing_options) {
+    std::optional<core::ImageProcessingOptions> image_processing_options) {
   if (image.UsesGpu()) {
     return CreateStatusWithPayload(
         absl::StatusCode::kInvalidArgument,
         absl::StrCat("GPU input images are currently not supported."),
         MediaPipeTasksStatus::kRunnerUnexpectedInputError);
   }
-  ASSIGN_OR_RETURN(NormalizedRect norm_rect,
-                   FillNormalizedRect(image_processing_options));
+  ASSIGN_OR_RETURN(
+      NormalizedRect norm_rect,
+      ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
   return SendLiveStreamData(
       {{kImageInStreamName,
         MakePacket<Image>(std::move(image))
diff --git a/mediapipe/tasks/cc/vision/object_detector/object_detector.h b/mediapipe/tasks/cc/vision/object_detector/object_detector.h
index 2e5ed7b8d..44ce68ed9 100644
--- a/mediapipe/tasks/cc/vision/object_detector/object_detector.h
+++ b/mediapipe/tasks/cc/vision/object_detector/object_detector.h
@@ -27,9 +27,9 @@ limitations under the License.
 #include "absl/status/statusor.h"
 #include "mediapipe/framework/formats/detection.pb.h"
 #include "mediapipe/framework/formats/image.h"
-#include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/tasks/cc/core/base_options.h"
 #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 
 namespace mediapipe {
@@ -154,10 +154,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   // after the yuv support is implemented.
   //
   // The optional 'image_processing_options' parameter can be used to specify
-  // the rotation to apply to the image before performing classification, by
-  // setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  // anti-clockwise rotation). Note that specifying a region-of-interest using
-  // the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
+  // the rotation to apply to the image before performing detection, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
   // and will result in an invalid argument error being returned.
   //
   // For CPU images, the returned bounding boxes are expressed in the
@@ -168,7 +167,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   // images after enabling the gpu support in MediaPipe Tasks.
   absl::StatusOr<std::vector<mediapipe::Detection>> Detect(
       mediapipe::Image image,
-      std::optional<mediapipe::NormalizedRect> image_processing_options =
+      std::optional<core::ImageProcessingOptions> image_processing_options =
           std::nullopt);
 
   // Performs object detection on the provided video frame.
@@ -180,10 +179,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   // must be monotonically increasing.
   //
   // The optional 'image_processing_options' parameter can be used to specify
-  // the rotation to apply to the image before performing classification, by
-  // setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  // anti-clockwise rotation). Note that specifying a region-of-interest using
-  // the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
+  // the rotation to apply to the image before performing detection, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
   // and will result in an invalid argument error being returned.
   //
   // For CPU images, the returned bounding boxes are expressed in the
@@ -192,7 +190,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   // underlying image data.
   absl::StatusOr<std::vector<mediapipe::Detection>> DetectForVideo(
       mediapipe::Image image, int64 timestamp_ms,
-      std::optional<mediapipe::NormalizedRect> image_processing_options =
+      std::optional<core::ImageProcessingOptions> image_processing_options =
           std::nullopt);
 
   // Sends live image data to perform object detection, and the results will be
@@ -206,10 +204,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   // increasing.
   //
   // The optional 'image_processing_options' parameter can be used to specify
-  // the rotation to apply to the image before performing classification, by
-  // setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
-  // anti-clockwise rotation). Note that specifying a region-of-interest using
-  // the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
+  // the rotation to apply to the image before performing detection, by
+  // setting its 'rotation_degrees' field. Note that specifying a
+  // region-of-interest using the 'region_of_interest' field is NOT supported
   // and will result in an invalid argument error being returned.
   //
   // The "result_callback" provides
@@ -223,7 +220,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
   //     outside of the callback, callers need to make a copy of the image.
   //   - The input timestamp in milliseconds.
   absl::Status DetectAsync(mediapipe::Image image, int64 timestamp_ms,
-                           std::optional<mediapipe::NormalizedRect>
+                           std::optional<core::ImageProcessingOptions>
                                image_processing_options = std::nullopt);
 
   // Shuts down the ObjectDetector when all works are done.
diff --git a/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc b/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc
index 8db3fa767..1747685dd 100644
--- a/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc
+++ b/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc
@@ -31,11 +31,12 @@ limitations under the License.
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/framework/formats/image.h"
 #include "mediapipe/framework/formats/location_data.pb.h"
-#include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
 #include "mediapipe/framework/port/status_matchers.h"
+#include "mediapipe/tasks/cc/components/containers/rect.h"
+#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
 #include "mediapipe/tasks/cc/vision/core/running_mode.h"
 #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
 #include "tensorflow/lite/c/common.h"
@@ -64,6 +65,8 @@ namespace vision {
 namespace {
 
 using ::mediapipe::file::JoinPath;
+using ::mediapipe::tasks::components::containers::Rect;
+using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
 using ::testing::HasSubstr;
 using ::testing::Optional;
 
@@ -532,8 +535,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
       JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
                           ObjectDetector::Create(std::move(options)));
-  NormalizedRect image_processing_options;
-  image_processing_options.set_rotation(M_PI / 2.0);
+  ImageProcessingOptions image_processing_options;
+  image_processing_options.rotation_degrees = -90;
   MP_ASSERT_OK_AND_ASSIGN(
       auto results, object_detector->Detect(image, image_processing_options));
   MP_ASSERT_OK(object_detector->Close());
@@ -557,16 +560,17 @@ TEST_F(ImageModeTest, FailsWithRegionOfInterest) {
       JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
   MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
                           ObjectDetector::Create(std::move(options)));
-  NormalizedRect image_processing_options;
-  image_processing_options.set_x_center(0.5);
-  image_processing_options.set_y_center(0.5);
-  image_processing_options.set_width(1.0);
-  image_processing_options.set_height(1.0);
+  Rect roi{/*left=*/0.1, /*top=*/0, /*right=*/0.9, /*bottom=*/1};
+  ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
 
   auto results = object_detector->Detect(image, image_processing_options);
   EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
   EXPECT_THAT(results.status().message(),
-              HasSubstr("ObjectDetector does not support region-of-interest"));
+              HasSubstr("This task doesn't support region-of-interest"));
+  EXPECT_THAT(
+      results.status().GetPayload(kMediaPipeTasksPayload),
+      Optional(absl::Cord(absl::StrCat(
+          MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
 }
 
 class VideoModeTest : public tflite_shims::testing::Test {};