From d7fd5b0cf5b44972840c8e2d2f75db63209b94dc Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 30 Mar 2023 10:02:58 -0700 Subject: [PATCH] Fix incorrect rotation handling in C++ vision tasks PiperOrigin-RevId: 520670536 --- mediapipe/tasks/cc/vision/core/BUILD | 1 + .../cc/vision/core/base_vision_task_api.h | 20 ++++++++++++++++++- .../cc/vision/face_detector/face_detector.cc | 18 ++++++++--------- .../vision/face_landmarker/face_landmarker.cc | 6 +++--- .../cc/vision/face_stylizer/face_stylizer.cc | 6 +++--- .../gesture_recognizer/gesture_recognizer.cc | 18 ++++++++--------- .../vision/hand_landmarker/hand_landmarker.cc | 18 ++++++++--------- .../image_classifier/image_classifier.cc | 6 +++--- .../image_classifier/image_classifier_test.cc | 19 +++++++++--------- .../vision/image_embedder/image_embedder.cc | 6 +++--- .../image_embedder/image_embedder_test.cc | 4 ++-- .../vision/image_segmenter/image_segmenter.cc | 18 ++++++++--------- .../interactive_segmenter.cc | 6 +++--- .../vision/object_detector/object_detector.cc | 18 ++++++++--------- .../object_detector/object_detector_test.cc | 5 ++--- 15 files changed, 93 insertions(+), 76 deletions(-) diff --git a/mediapipe/tasks/cc/vision/core/BUILD b/mediapipe/tasks/cc/vision/core/BUILD index 5791c25f1..0815b5b2b 100644 --- a/mediapipe/tasks/cc/vision/core/BUILD +++ b/mediapipe/tasks/cc/vision/core/BUILD @@ -39,6 +39,7 @@ cc_library( ":running_mode", "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator_cc_proto", + "//mediapipe/framework/formats:image", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/tasks/cc/components/containers:rect", "//mediapipe/tasks/cc/core:base_task_api", diff --git a/mediapipe/tasks/cc/vision/core/base_vision_task_api.h b/mediapipe/tasks/cc/vision/core/base_vision_task_api.h index 8b27ca19d..c56f350b2 100644 --- a/mediapipe/tasks/cc/vision/core/base_vision_task_api.h +++ b/mediapipe/tasks/cc/vision/core/base_vision_task_api.h @@ -17,6 +17,7 @@ limitations under the License. #define MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_ #include +#include #include #include #include @@ -26,6 +27,7 @@ limitations under the License. #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h" +#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/tasks/cc/components/containers/rect.h" #include "mediapipe/tasks/cc/core/base_task_api.h" @@ -136,7 +138,8 @@ class BaseVisionTaskApi : public tasks::core::BaseTaskApi { // to 0. If 'roi_allowed' is false, an error will be returned if the input // ImageProcessingOptions has its 'region_or_interest' field set. static absl::StatusOr ConvertToNormalizedRect( - std::optional options, bool roi_allowed = true) { + std::optional options, + const mediapipe::Image& image, bool roi_allowed = true) { mediapipe::NormalizedRect normalized_rect; normalized_rect.set_rotation(0); normalized_rect.set_x_center(0.5); @@ -181,6 +184,21 @@ class BaseVisionTaskApi : public tasks::core::BaseTaskApi { normalized_rect.set_width(roi.right - roi.left); normalized_rect.set_height(roi.bottom - roi.top); } + + // For 90° and 270° rotations, we need to swap width and height. + // This is due to the internal behavior of ImageToTensorCalculator, which: + // - first denormalizes the provided rect by multiplying the rect width or + // height by the image width or height, repectively. + // - then rotates this by denormalized rect by the provided rotation, and + // uses this for cropping, + // - then finally rotates this back. + if (std::abs(options->rotation_degrees) % 180 != 0) { + float w = normalized_rect.height() * image.height() / image.width(); + float h = normalized_rect.width() * image.width() / image.height(); + normalized_rect.set_width(w); + normalized_rect.set_height(h); + } + return normalized_rect; } diff --git a/mediapipe/tasks/cc/vision/face_detector/face_detector.cc b/mediapipe/tasks/cc/vision/face_detector/face_detector.cc index f11cdd14d..80e114bf8 100644 --- a/mediapipe/tasks/cc/vision/face_detector/face_detector.cc +++ b/mediapipe/tasks/cc/vision/face_detector/face_detector.cc @@ -136,9 +136,9 @@ absl::StatusOr> FaceDetector::Create( absl::StatusOr FaceDetector::Detect( mediapipe::Image image, std::optional image_processing_options) { - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -156,9 +156,9 @@ absl::StatusOr FaceDetector::Detect( absl::StatusOr FaceDetector::DetectForVideo( mediapipe::Image image, uint64_t timestamp_ms, std::optional image_processing_options) { - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -179,9 +179,9 @@ absl::StatusOr FaceDetector::DetectForVideo( absl::Status FaceDetector::DetectAsync( mediapipe::Image image, uint64_t timestamp_ms, std::optional image_processing_options) { - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc index e006b4490..b40ea3324 100644 --- a/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc +++ b/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.cc @@ -194,7 +194,7 @@ absl::StatusOr FaceLandmarker::Detect( mediapipe::Image image, std::optional image_processing_options) { ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, + ConvertToNormalizedRect(image_processing_options, image, /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, @@ -212,7 +212,7 @@ absl::StatusOr FaceLandmarker::DetectForVideo( mediapipe::Image image, int64_t timestamp_ms, std::optional image_processing_options) { ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, + ConvertToNormalizedRect(image_processing_options, image, /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, @@ -233,7 +233,7 @@ absl::Status FaceLandmarker::DetectAsync( mediapipe::Image image, int64_t timestamp_ms, std::optional image_processing_options) { ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, + ConvertToNormalizedRect(image_processing_options, image, /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, diff --git a/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer.cc b/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer.cc index aaabc078a..d0f9bdc6d 100644 --- a/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer.cc +++ b/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer.cc @@ -138,7 +138,7 @@ absl::StatusOr FaceStylizer::Stylize( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -157,7 +157,7 @@ absl::StatusOr FaceStylizer::StylizeForVideo( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -180,7 +180,7 @@ absl::Status FaceStylizer::StylizeAsync( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc index 91a5ec213..21ad14848 100644 --- a/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc +++ b/mediapipe/tasks/cc/vision/gesture_recognizer/gesture_recognizer.cc @@ -222,9 +222,9 @@ absl::StatusOr GestureRecognizer::Recognize( "GPU input images are currently not supported.", MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -258,9 +258,9 @@ absl::StatusOr GestureRecognizer::RecognizeForVideo( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -297,9 +297,9 @@ absl::Status GestureRecognizer::RecognizeAsync( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.cc index ab66fe136..b5915b6f6 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.cc @@ -185,9 +185,9 @@ absl::StatusOr HandLandmarker::Detect( "GPU input images are currently not supported.", MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -223,9 +223,9 @@ absl::StatusOr HandLandmarker::DetectForVideo( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -264,9 +264,9 @@ absl::Status HandLandmarker::DetectAsync( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc index 763e0a320..af684df05 100644 --- a/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc +++ b/mediapipe/tasks/cc/vision/image_classifier/image_classifier.cc @@ -156,7 +156,7 @@ absl::StatusOr ImageClassifier::Classify( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -176,7 +176,7 @@ absl::StatusOr ImageClassifier::ClassifyForVideo( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -200,7 +200,7 @@ absl::Status ImageClassifier::ClassifyAsync( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc b/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc index 7aa2a148c..8fbdd23dd 100644 --- a/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc +++ b/mediapipe/tasks/cc/vision/image_classifier/image_classifier_test.cc @@ -505,11 +505,9 @@ TEST_F(ImageModeTest, SucceedsWithRotation) { ImageClassifierResult expected; expected.classifications.emplace_back(Classifications{ /*categories=*/{ - {/*index=*/934, /*score=*/0.6371766, - /*category_name=*/"cheeseburger"}, - {/*index=*/963, /*score=*/0.049443405, /*category_name=*/"meat loaf"}, - {/*index=*/925, /*score=*/0.047918003, - /*category_name=*/"guacamole"}}, + {/*index=*/934, /*score=*/0.754467, /*category_name=*/"cheeseburger"}, + {/*index=*/925, /*score=*/0.0288028, /*category_name=*/"guacamole"}, + {/*index=*/932, /*score=*/0.0286119, /*category_name=*/"bagel"}}, /*head_index=*/0, /*head_name=*/"probability"}); ExpectApproximatelyEqual(results, expected); @@ -525,9 +523,10 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) { options->classifier_options.max_results = 1; MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr image_classifier, ImageClassifier::Create(std::move(options))); - // Region-of-interest around the chair, with 90° anti-clockwise rotation. - RectF roi{/*left=*/0.006, /*top=*/0.1763, /*right=*/0.5702, - /*bottom=*/0.3049}; + // Region-of-interest around the soccer ball, with 90° anti-clockwise + // rotation. + RectF roi{/*left=*/0.2655, /*top=*/0.45, /*right=*/0.6925, + /*bottom=*/0.614}; ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/-90}; @@ -536,8 +535,8 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) { ImageClassifierResult expected; expected.classifications.emplace_back( - Classifications{/*categories=*/{{/*index=*/560, /*score=*/0.6522213, - /*category_name=*/"folding chair"}}, + Classifications{/*categories=*/{{/*index=*/806, /*score=*/0.997684, + /*category_name=*/"soccer ball"}}, /*head_index=*/0, /*head_name=*/"probability"}); ExpectApproximatelyEqual(results, expected); diff --git a/mediapipe/tasks/cc/vision/image_embedder/image_embedder.cc b/mediapipe/tasks/cc/vision/image_embedder/image_embedder.cc index 494b075a7..c8e56d36d 100644 --- a/mediapipe/tasks/cc/vision/image_embedder/image_embedder.cc +++ b/mediapipe/tasks/cc/vision/image_embedder/image_embedder.cc @@ -151,7 +151,7 @@ absl::StatusOr ImageEmbedder::Embed( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -172,7 +172,7 @@ absl::StatusOr ImageEmbedder::EmbedForVideo( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -196,7 +196,7 @@ absl::Status ImageEmbedder::EmbedAsync( MediaPipeTasksStatus::kRunnerUnexpectedInputError); } ASSIGN_OR_RETURN(NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options)); + ConvertToNormalizedRect(image_processing_options, image)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/image_embedder/image_embedder_test.cc b/mediapipe/tasks/cc/vision/image_embedder/image_embedder_test.cc index 6aa0b85bc..4024550f9 100644 --- a/mediapipe/tasks/cc/vision/image_embedder/image_embedder_test.cc +++ b/mediapipe/tasks/cc/vision/image_embedder/image_embedder_test.cc @@ -371,7 +371,7 @@ TEST_F(ImageModeTest, SucceedsWithRotation) { MP_ASSERT_OK_AND_ASSIGN(double similarity, ImageEmbedder::CosineSimilarity( image_result.embeddings[0], rotated_result.embeddings[0])); - double expected_similarity = 0.572265; + double expected_similarity = 0.98223; EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy); } @@ -406,7 +406,7 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) { MP_ASSERT_OK_AND_ASSIGN(double similarity, ImageEmbedder::CosineSimilarity( crop_result.embeddings[0], rotated_result.embeddings[0])); - double expected_similarity = 0.62838; + double expected_similarity = 0.974683; EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy); } diff --git a/mediapipe/tasks/cc/vision/image_segmenter/image_segmenter.cc b/mediapipe/tasks/cc/vision/image_segmenter/image_segmenter.cc index c12fe7f7e..ab1d3c84b 100644 --- a/mediapipe/tasks/cc/vision/image_segmenter/image_segmenter.cc +++ b/mediapipe/tasks/cc/vision/image_segmenter/image_segmenter.cc @@ -192,9 +192,9 @@ absl::StatusOr> ImageSegmenter::Segment( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -213,9 +213,9 @@ absl::StatusOr> ImageSegmenter::SegmentForVideo( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -237,9 +237,9 @@ absl::Status ImageSegmenter::SegmentAsync( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/interactive_segmenter/interactive_segmenter.cc b/mediapipe/tasks/cc/vision/interactive_segmenter/interactive_segmenter.cc index 4298d4a19..853baec29 100644 --- a/mediapipe/tasks/cc/vision/interactive_segmenter/interactive_segmenter.cc +++ b/mediapipe/tasks/cc/vision/interactive_segmenter/interactive_segmenter.cc @@ -142,9 +142,9 @@ absl::StatusOr> InteractiveSegmenter::Segment( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN(RenderData roi_as_render_data, ConvertRoiToRenderData(roi)); ASSIGN_OR_RETURN( auto output_packets, diff --git a/mediapipe/tasks/cc/vision/object_detector/object_detector.cc b/mediapipe/tasks/cc/vision/object_detector/object_detector.cc index 2477f8a44..9483d73c0 100644 --- a/mediapipe/tasks/cc/vision/object_detector/object_detector.cc +++ b/mediapipe/tasks/cc/vision/object_detector/object_detector.cc @@ -157,9 +157,9 @@ absl::StatusOr ObjectDetector::Detect( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessImageData( @@ -178,9 +178,9 @@ absl::StatusOr ObjectDetector::DetectForVideo( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); ASSIGN_OR_RETURN( auto output_packets, ProcessVideoData( @@ -203,9 +203,9 @@ absl::Status ObjectDetector::DetectAsync( absl::StrCat("GPU input images are currently not supported."), MediaPipeTasksStatus::kRunnerUnexpectedInputError); } - ASSIGN_OR_RETURN( - NormalizedRect norm_rect, - ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false)); + ASSIGN_OR_RETURN(NormalizedRect norm_rect, + ConvertToNormalizedRect(image_processing_options, image, + /*roi_allowed=*/false)); return SendLiveStreamData( {{kImageInStreamName, MakePacket(std::move(image)) diff --git a/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc b/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc index cb37bbead..178f95168 100644 --- a/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc +++ b/mediapipe/tasks/cc/vision/object_detector/object_detector_test.cc @@ -575,7 +575,6 @@ TEST_F(ImageModeTest, SucceedsWithRotation) { "cats_and_dogs_rotated.jpg"))); auto options = std::make_unique(); options->max_results = 1; - options->category_allowlist.push_back("cat"); options->base_options.model_asset_path = JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata); MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr object_detector, @@ -589,10 +588,10 @@ TEST_F(ImageModeTest, SucceedsWithRotation) { results, ConvertToDetectionResult({ParseTextProtoOrDie(R"pb( label: "cat" - score: 0.7109375 + score: 0.69921875 location_data { format: BOUNDING_BOX - bounding_box { xmin: 0 ymin: 622 width: 436 height: 276 } + bounding_box { xmin: 0 ymin: 608 width: 439 height: 387 } })pb")})); }