Internal change

PiperOrigin-RevId: 482906478
This commit is contained in:
MediaPipe Team 2022-10-21 15:47:13 -07:00 committed by Copybara-Service
parent d0437b7f91
commit 4a6c23a76a
14 changed files with 302 additions and 181 deletions

View File

@ -21,12 +21,23 @@ cc_library(
hdrs = ["running_mode.h"], hdrs = ["running_mode.h"],
) )
cc_library(
name = "image_processing_options",
hdrs = ["image_processing_options.h"],
deps = [
"//mediapipe/tasks/cc/components/containers:rect",
],
)
cc_library( cc_library(
name = "base_vision_task_api", name = "base_vision_task_api",
hdrs = ["base_vision_task_api.h"], hdrs = ["base_vision_task_api.h"],
deps = [ deps = [
":image_processing_options",
":running_mode", ":running_mode",
"//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/tasks/cc/components/containers:rect",
"//mediapipe/tasks/cc/core:base_task_api", "//mediapipe/tasks/cc/core:base_task_api",
"//mediapipe/tasks/cc/core:task_runner", "//mediapipe/tasks/cc/core:task_runner",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",

View File

@ -16,15 +16,20 @@ limitations under the License.
#ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_ #ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
#define MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_ #define MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
#include <cmath>
#include <memory> #include <memory>
#include <optional>
#include <string> #include <string>
#include <utility> #include <utility>
#include "absl/status/status.h" #include "absl/status/status.h"
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/components/containers/rect.h"
#include "mediapipe/tasks/cc/core/base_task_api.h" #include "mediapipe/tasks/cc/core/base_task_api.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
namespace mediapipe { namespace mediapipe {
@ -87,6 +92,60 @@ class BaseVisionTaskApi : public tasks::core::BaseTaskApi {
return runner_->Send(std::move(inputs)); return runner_->Send(std::move(inputs));
} }
// Convert from ImageProcessingOptions to NormalizedRect, performing sanity
// checks on-the-fly. If the input ImageProcessingOptions is not present,
// returns a default NormalizedRect covering the whole image with rotation set
// to 0. If 'roi_allowed' is false, an error will be returned if the input
// ImageProcessingOptions has its 'region_or_interest' field set.
static absl::StatusOr<mediapipe::NormalizedRect> ConvertToNormalizedRect(
std::optional<ImageProcessingOptions> options, bool roi_allowed = true) {
mediapipe::NormalizedRect normalized_rect;
normalized_rect.set_rotation(0);
normalized_rect.set_x_center(0.5);
normalized_rect.set_y_center(0.5);
normalized_rect.set_width(1.0);
normalized_rect.set_height(1.0);
if (!options.has_value()) {
return normalized_rect;
}
if (options->rotation_degrees % 90 != 0) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"Expected rotation to be a multiple of 90°.",
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
}
// Convert to radians counter-clockwise.
normalized_rect.set_rotation(-options->rotation_degrees * M_PI / 180.0);
if (options->region_of_interest.has_value()) {
if (!roi_allowed) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"This task doesn't support region-of-interest.",
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
}
auto& roi = *options->region_of_interest;
if (roi.left >= roi.right || roi.top >= roi.bottom) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"Expected Rect with left < right and top < bottom.",
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
}
if (roi.left < 0 || roi.top < 0 || roi.right > 1 || roi.bottom > 1) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"Expected Rect values to be in [0,1].",
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
}
normalized_rect.set_x_center((roi.left + roi.right) / 2.0);
normalized_rect.set_y_center((roi.top + roi.bottom) / 2.0);
normalized_rect.set_width(roi.right - roi.left);
normalized_rect.set_height(roi.bottom - roi.top);
}
return normalized_rect;
}
private: private:
RunningMode running_mode_; RunningMode running_mode_;
}; };

View File

@ -0,0 +1,52 @@
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
#define MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
#include <optional>
#include "mediapipe/tasks/cc/components/containers/rect.h"
namespace mediapipe {
namespace tasks {
namespace vision {
namespace core {
// Options for image processing.
//
// If both region-or-interest and rotation are specified, the crop around the
// region-of-interest is extracted first, the the specified rotation is applied
// to the crop.
struct ImageProcessingOptions {
// The optional region-of-interest to crop from the image. If not specified,
// the full image is used.
//
// Coordinates must be in [0,1] with 'left' < 'right' and 'top' < bottom.
std::optional<components::containers::Rect> region_of_interest = std::nullopt;
// The rotation to apply to the image (or cropped region-of-interest), in
// degrees clockwise.
//
// The rotation must be a multiple (positive or negative) of 90°.
int rotation_degrees = 0;
};
} // namespace core
} // namespace vision
} // namespace tasks
} // namespace mediapipe
#endif // MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_

View File

@ -137,6 +137,7 @@ cc_library(
"//mediapipe/tasks/cc/core:utils", "//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/core:base_vision_task_api", "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
"//mediapipe/tasks/cc/vision/core:image_processing_options",
"//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory", "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_cc_proto",

View File

@ -39,6 +39,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h" #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
@ -76,31 +77,6 @@ constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks"; constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
constexpr int kMicroSecondsPerMilliSecond = 1000; constexpr int kMicroSecondsPerMilliSecond = 1000;
// Returns a NormalizedRect filling the whole image. If input is present, its
// rotation is set in the returned NormalizedRect and a check is performed to
// make sure no region-of-interest was provided. Otherwise, rotation is set to
// 0.
absl::StatusOr<NormalizedRect> FillNormalizedRect(
std::optional<NormalizedRect> normalized_rect) {
NormalizedRect result;
if (normalized_rect.has_value()) {
result = *normalized_rect;
}
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
result.has_width() || result.has_height();
if (has_coordinates) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"GestureRecognizer does not support region-of-interest.",
MediaPipeTasksStatus::kInvalidArgumentError);
}
result.set_x_center(0.5);
result.set_y_center(0.5);
result.set_width(1);
result.set_height(1);
return result;
}
// Creates a MediaPipe graph config that contains a subgraph node of // Creates a MediaPipe graph config that contains a subgraph node of
// "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running // "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
// in the live stream mode, a "FlowLimiterCalculator" will be added to limit the // in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
@ -248,15 +224,16 @@ absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize( absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
mediapipe::Image image, mediapipe::Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.", "GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessImageData( ProcessImageData(
@ -283,15 +260,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo( absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
mediapipe::Image image, int64 timestamp_ms, mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessVideoData( ProcessVideoData(
@ -321,15 +299,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
absl::Status GestureRecognizer::RecognizeAsync( absl::Status GestureRecognizer::RecognizeAsync(
mediapipe::Image image, int64 timestamp_ms, mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
return SendLiveStreamData( return SendLiveStreamData(
{{kImageInStreamName, {{kImageInStreamName,
MakePacket<Image>(std::move(image)) MakePacket<Image>(std::move(image))

View File

@ -23,10 +23,10 @@ limitations under the License.
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h" #include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
#include "mediapipe/tasks/cc/core/base_options.h" #include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
namespace mediapipe { namespace mediapipe {
@ -129,36 +129,36 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// Only use this method when the GestureRecognizer is created with the image // Only use this method when the GestureRecognizer is created with the image
// running mode. // running mode.
// //
// image - mediapipe::Image // The optional 'image_processing_options' parameter can be used to specify
// Image to perform hand gesture recognition on. // the rotation to apply to the image before performing recognition, by
// imageProcessingOptions - std::optional<NormalizedRect> // setting its 'rotation_degrees' field. Note that specifying a
// If provided, can be used to specify the rotation to apply to the image // region-of-interest using the 'region_of_interest' field is NOT supported
// before performing classification, by setting its 'rotation' field in // and will result in an invalid argument error being returned.
// radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note that
// specifying a region-of-interest using the 'x_center', 'y_center', 'width'
// and 'height' fields is NOT supported and will result in an invalid
// argument error being returned.
// //
// The image can be of any size with format RGB or RGBA. // The image can be of any size with format RGB or RGBA.
// TODO: Describes how the input image will be preprocessed // TODO: Describes how the input image will be preprocessed
// after the yuv support is implemented. // after the yuv support is implemented.
// TODO: use an ImageProcessingOptions struct instead of
// NormalizedRect.
absl::StatusOr<components::containers::GestureRecognitionResult> Recognize( absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
Image image, Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options = std::optional<core::ImageProcessingOptions> image_processing_options =
std::nullopt); std::nullopt);
// Performs gesture recognition on the provided video frame. // Performs gesture recognition on the provided video frame.
// Only use this method when the GestureRecognizer is created with the video // Only use this method when the GestureRecognizer is created with the video
// running mode. // running mode.
// //
// The optional 'image_processing_options' parameter can be used to specify
// the rotation to apply to the image before performing recognition, by
// setting its 'rotation_degrees' field. Note that specifying a
// region-of-interest using the 'region_of_interest' field is NOT supported
// and will result in an invalid argument error being returned.
//
// The image can be of any size with format RGB or RGBA. It's required to // The image can be of any size with format RGB or RGBA. It's required to
// provide the video frame's timestamp (in milliseconds). The input timestamps // provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing. // must be monotonically increasing.
absl::StatusOr<components::containers::GestureRecognitionResult> absl::StatusOr<components::containers::GestureRecognitionResult>
RecognizeForVideo(Image image, int64 timestamp_ms, RecognizeForVideo(Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> std::optional<core::ImageProcessingOptions>
image_processing_options = std::nullopt); image_processing_options = std::nullopt);
// Sends live image data to perform gesture recognition, and the results will // Sends live image data to perform gesture recognition, and the results will
@ -171,6 +171,12 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// sent to the gesture recognizer. The input timestamps must be monotonically // sent to the gesture recognizer. The input timestamps must be monotonically
// increasing. // increasing.
// //
// The optional 'image_processing_options' parameter can be used to specify
// the rotation to apply to the image before performing recognition, by
// setting its 'rotation_degrees' field. Note that specifying a
// region-of-interest using the 'region_of_interest' field is NOT supported
// and will result in an invalid argument error being returned.
//
// The "result_callback" provides // The "result_callback" provides
// - A vector of GestureRecognitionResult, each is the recognized results // - A vector of GestureRecognitionResult, each is the recognized results
// for a input frame. // for a input frame.
@ -180,7 +186,7 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
// outside of the callback, callers need to make a copy of the image. // outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds. // - The input timestamp in milliseconds.
absl::Status RecognizeAsync(Image image, int64 timestamp_ms, absl::Status RecognizeAsync(Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> std::optional<core::ImageProcessingOptions>
image_processing_options = std::nullopt); image_processing_options = std::nullopt);
// Shuts down the GestureRecognizer when all works are done. // Shuts down the GestureRecognizer when all works are done.

View File

@ -59,6 +59,7 @@ cc_library(
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto", "//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/core:base_vision_task_api", "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
"//mediapipe/tasks/cc/vision/core:image_processing_options",
"//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory", "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
"//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_cc_proto",

View File

@ -34,6 +34,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h" #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
#include "mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options.pb.h"
@ -59,26 +60,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
using ::mediapipe::tasks::components::containers::proto::ClassificationResult; using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
using ::mediapipe::tasks::core::PacketMap; using ::mediapipe::tasks::core::PacketMap;
// Returns a NormalizedRect covering the full image if input is not present.
// Otherwise, makes sure the x_center, y_center, width and height are set in
// case only a rotation was provided in the input.
NormalizedRect FillNormalizedRect(
std::optional<NormalizedRect> normalized_rect) {
NormalizedRect result;
if (normalized_rect.has_value()) {
result = *normalized_rect;
}
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
result.has_width() || result.has_height();
if (!has_coordinates) {
result.set_x_center(0.5);
result.set_y_center(0.5);
result.set_width(1);
result.set_height(1);
}
return result;
}
// Creates a MediaPipe graph config that contains a subgraph node of // Creates a MediaPipe graph config that contains a subgraph node of
// type "ImageClassifierGraph". If the task is running in the live stream mode, // type "ImageClassifierGraph". If the task is running in the live stream mode,
// a "FlowLimiterCalculator" will be added to limit the number of frames in // a "FlowLimiterCalculator" will be added to limit the number of frames in
@ -164,14 +145,16 @@ absl::StatusOr<std::unique_ptr<ImageClassifier>> ImageClassifier::Create(
} }
absl::StatusOr<ClassificationResult> ImageClassifier::Classify( absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
Image image, std::optional<NormalizedRect> image_processing_options) { Image image,
std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.", "GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options); ASSIGN_OR_RETURN(NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessImageData( ProcessImageData(
@ -183,14 +166,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo( absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
Image image, int64 timestamp_ms, Image image, int64 timestamp_ms,
std::optional<NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.", "GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options); ASSIGN_OR_RETURN(NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessVideoData( ProcessVideoData(
@ -206,14 +190,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
absl::Status ImageClassifier::ClassifyAsync( absl::Status ImageClassifier::ClassifyAsync(
Image image, int64 timestamp_ms, Image image, int64 timestamp_ms,
std::optional<NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
"GPU input images are currently not supported.", "GPU input images are currently not supported.",
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options); ASSIGN_OR_RETURN(NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options));
return SendLiveStreamData( return SendLiveStreamData(
{{kImageInStreamName, {{kImageInStreamName,
MakePacket<Image>(std::move(image)) MakePacket<Image>(std::move(image))

View File

@ -22,11 +22,11 @@ limitations under the License.
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h" #include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
#include "mediapipe/tasks/cc/components/processors/classifier_options.h" #include "mediapipe/tasks/cc/components/processors/classifier_options.h"
#include "mediapipe/tasks/cc/core/base_options.h" #include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
namespace mediapipe { namespace mediapipe {
@ -109,12 +109,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// //
// The optional 'image_processing_options' parameter can be used to specify: // The optional 'image_processing_options' parameter can be used to specify:
// - the rotation to apply to the image before performing classification, by // - the rotation to apply to the image before performing classification, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field.
// anti-clockwise rotation).
// and/or // and/or
// - the region-of-interest on which to perform classification, by setting its // - the region-of-interest on which to perform classification, by setting its
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is // 'region_of_interest' field. If not specified, the full image is used.
// set, they will automatically be set to cover the full image.
// If both are specified, the crop around the region-of-interest is extracted // If both are specified, the crop around the region-of-interest is extracted
// first, then the specified rotation is applied to the crop. // first, then the specified rotation is applied to the crop.
// //
@ -126,19 +124,17 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// YUVToImageCalculator is integrated. // YUVToImageCalculator is integrated.
absl::StatusOr<components::containers::proto::ClassificationResult> Classify( absl::StatusOr<components::containers::proto::ClassificationResult> Classify(
mediapipe::Image image, mediapipe::Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options = std::optional<core::ImageProcessingOptions> image_processing_options =
std::nullopt); std::nullopt);
// Performs image classification on the provided video frame. // Performs image classification on the provided video frame.
// //
// The optional 'image_processing_options' parameter can be used to specify: // The optional 'image_processing_options' parameter can be used to specify:
// - the rotation to apply to the image before performing classification, by // - the rotation to apply to the image before performing classification, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field.
// anti-clockwise rotation).
// and/or // and/or
// - the region-of-interest on which to perform classification, by setting its // - the region-of-interest on which to perform classification, by setting its
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is // 'region_of_interest' field. If not specified, the full image is used.
// set, they will automatically be set to cover the full image.
// If both are specified, the crop around the region-of-interest is extracted // If both are specified, the crop around the region-of-interest is extracted
// first, then the specified rotation is applied to the crop. // first, then the specified rotation is applied to the crop.
// //
@ -150,7 +146,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// must be monotonically increasing. // must be monotonically increasing.
absl::StatusOr<components::containers::proto::ClassificationResult> absl::StatusOr<components::containers::proto::ClassificationResult>
ClassifyForVideo(mediapipe::Image image, int64 timestamp_ms, ClassifyForVideo(mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> std::optional<core::ImageProcessingOptions>
image_processing_options = std::nullopt); image_processing_options = std::nullopt);
// Sends live image data to image classification, and the results will be // Sends live image data to image classification, and the results will be
@ -158,12 +154,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// //
// The optional 'image_processing_options' parameter can be used to specify: // The optional 'image_processing_options' parameter can be used to specify:
// - the rotation to apply to the image before performing classification, by // - the rotation to apply to the image before performing classification, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field.
// anti-clockwise rotation).
// and/or // and/or
// - the region-of-interest on which to perform classification, by setting its // - the region-of-interest on which to perform classification, by setting its
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is // 'region_of_interest' field. If not specified, the full image is used.
// set, they will automatically be set to cover the full image.
// If both are specified, the crop around the region-of-interest is extracted // If both are specified, the crop around the region-of-interest is extracted
// first, then the specified rotation is applied to the crop. // first, then the specified rotation is applied to the crop.
// //
@ -175,7 +169,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// sent to the object detector. The input timestamps must be monotonically // sent to the object detector. The input timestamps must be monotonically
// increasing. // increasing.
// //
// The "result_callback" prvoides // The "result_callback" provides:
// - The classification results as a ClassificationResult object. // - The classification results as a ClassificationResult object.
// - The const reference to the corresponding input image that the image // - The const reference to the corresponding input image that the image
// classifier runs on. Note that the const reference to the image will no // classifier runs on. Note that the const reference to the image will no
@ -183,12 +177,9 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
// outside of the callback, callers need to make a copy of the image. // outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds. // - The input timestamp in milliseconds.
absl::Status ClassifyAsync(mediapipe::Image image, int64 timestamp_ms, absl::Status ClassifyAsync(mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> std::optional<core::ImageProcessingOptions>
image_processing_options = std::nullopt); image_processing_options = std::nullopt);
// TODO: add Classify() variants taking a region of interest as
// additional argument.
// Shuts down the ImageClassifier when all works are done. // Shuts down the ImageClassifier when all works are done.
absl::Status Close() { return runner_->Close(); } absl::Status Close() { return runner_->Close(); }
}; };

View File

@ -27,7 +27,6 @@ limitations under the License.
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"
#include "mediapipe/framework/deps/file_path.h" #include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/parse_text_proto.h"
@ -35,6 +34,8 @@ limitations under the License.
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/components/containers/proto/category.pb.h" #include "mediapipe/tasks/cc/components/containers/proto/category.pb.h"
#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h" #include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
#include "mediapipe/tasks/cc/components/containers/rect.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
#include "tensorflow/lite/core/api/op_resolver.h" #include "tensorflow/lite/core/api/op_resolver.h"
@ -49,9 +50,11 @@ namespace image_classifier {
namespace { namespace {
using ::mediapipe::file::JoinPath; using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::components::containers::Rect;
using ::mediapipe::tasks::components::containers::proto::ClassificationEntry; using ::mediapipe::tasks::components::containers::proto::ClassificationEntry;
using ::mediapipe::tasks::components::containers::proto::ClassificationResult; using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
using ::mediapipe::tasks::components::containers::proto::Classifications; using ::mediapipe::tasks::components::containers::proto::Classifications;
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
using ::testing::HasSubstr; using ::testing::HasSubstr;
using ::testing::Optional; using ::testing::Optional;
@ -547,12 +550,9 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterest) {
options->classifier_options.max_results = 1; options->classifier_options.max_results = 1;
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
ImageClassifier::Create(std::move(options))); ImageClassifier::Create(std::move(options)));
// Crop around the soccer ball. // Region-of-interest around the soccer ball.
NormalizedRect image_processing_options; Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
image_processing_options.set_x_center(0.532); ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
image_processing_options.set_y_center(0.521);
image_processing_options.set_width(0.164);
image_processing_options.set_height(0.427);
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify( MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
image, image_processing_options)); image, image_processing_options));
@ -572,8 +572,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
ImageClassifier::Create(std::move(options))); ImageClassifier::Create(std::move(options)));
// Specify a 90° anti-clockwise rotation. // Specify a 90° anti-clockwise rotation.
NormalizedRect image_processing_options; ImageProcessingOptions image_processing_options;
image_processing_options.set_rotation(M_PI / 2.0); image_processing_options.rotation_degrees = -90;
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify( MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
image, image_processing_options)); image, image_processing_options));
@ -616,13 +616,10 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
options->classifier_options.max_results = 1; options->classifier_options.max_results = 1;
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
ImageClassifier::Create(std::move(options))); ImageClassifier::Create(std::move(options)));
// Crop around the chair, with 90° anti-clockwise rotation. // Region-of-interest around the chair, with 90° anti-clockwise rotation.
NormalizedRect image_processing_options; Rect roi{/*left=*/0.006, /*top=*/0.1763, /*right=*/0.5702, /*bottom=*/0.3049};
image_processing_options.set_x_center(0.2821); ImageProcessingOptions image_processing_options{roi,
image_processing_options.set_y_center(0.2406); /*rotation_degrees=*/-90};
image_processing_options.set_width(0.5642);
image_processing_options.set_height(0.1286);
image_processing_options.set_rotation(M_PI / 2.0);
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify( MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
image, image_processing_options)); image, image_processing_options));
@ -633,7 +630,7 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
entries { entries {
categories { categories {
index: 560 index: 560
score: 0.6800408 score: 0.6522213
category_name: "folding chair" category_name: "folding chair"
} }
timestamp_ms: 0 timestamp_ms: 0
@ -643,6 +640,69 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
})pb")); })pb"));
} }
// Testing all these once with ImageClassifier.
TEST_F(ImageModeTest, FailsWithInvalidImageProcessingOptions) {
MP_ASSERT_OK_AND_ASSIGN(Image image,
DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
"multi_objects.jpg")));
auto options = std::make_unique<ImageClassifierOptions>();
options->base_options.model_asset_path =
JoinPath("./", kTestDataDirectory, kMobileNetFloatWithMetadata);
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
ImageClassifier::Create(std::move(options)));
// Invalid: left > right.
Rect roi{/*left=*/0.9, /*top=*/0, /*right=*/0.1, /*bottom=*/1};
ImageProcessingOptions image_processing_options{roi,
/*rotation_degrees=*/0};
auto results = image_classifier->Classify(image, image_processing_options);
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
EXPECT_THAT(results.status().message(),
HasSubstr("Expected Rect with left < right and top < bottom"));
EXPECT_THAT(
results.status().GetPayload(kMediaPipeTasksPayload),
Optional(absl::Cord(absl::StrCat(
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
// Invalid: top > bottom.
roi = {/*left=*/0, /*top=*/0.9, /*right=*/1, /*bottom=*/0.1};
image_processing_options = {roi,
/*rotation_degrees=*/0};
results = image_classifier->Classify(image, image_processing_options);
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
EXPECT_THAT(results.status().message(),
HasSubstr("Expected Rect with left < right and top < bottom"));
EXPECT_THAT(
results.status().GetPayload(kMediaPipeTasksPayload),
Optional(absl::Cord(absl::StrCat(
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
// Invalid: coordinates out of [0,1] range.
roi = {/*left=*/-0.1, /*top=*/0, /*right=*/1, /*bottom=*/1};
image_processing_options = {roi,
/*rotation_degrees=*/0};
results = image_classifier->Classify(image, image_processing_options);
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
EXPECT_THAT(results.status().message(),
HasSubstr("Expected Rect values to be in [0,1]"));
EXPECT_THAT(
results.status().GetPayload(kMediaPipeTasksPayload),
Optional(absl::Cord(absl::StrCat(
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
// Invalid: rotation not a multiple of 90°.
image_processing_options = {/*region_of_interest=*/std::nullopt,
/*rotation_degrees=*/1};
results = image_classifier->Classify(image, image_processing_options);
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
EXPECT_THAT(results.status().message(),
HasSubstr("Expected rotation to be a multiple of 90°"));
EXPECT_THAT(
results.status().GetPayload(kMediaPipeTasksPayload),
Optional(absl::Cord(absl::StrCat(
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
}
class VideoModeTest : public tflite_shims::testing::Test {}; class VideoModeTest : public tflite_shims::testing::Test {};
TEST_F(VideoModeTest, FailsWithCallingWrongMethod) { TEST_F(VideoModeTest, FailsWithCallingWrongMethod) {
@ -732,11 +792,9 @@ TEST_F(VideoModeTest, SucceedsWithRegionOfInterest) {
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
ImageClassifier::Create(std::move(options))); ImageClassifier::Create(std::move(options)));
// Crop around the soccer ball. // Crop around the soccer ball.
NormalizedRect image_processing_options; // Region-of-interest around the soccer ball.
image_processing_options.set_x_center(0.532); Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
image_processing_options.set_y_center(0.521); ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
image_processing_options.set_width(0.164);
image_processing_options.set_height(0.427);
for (int i = 0; i < iterations; ++i) { for (int i = 0; i < iterations; ++i) {
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
@ -877,11 +935,8 @@ TEST_F(LiveStreamModeTest, SucceedsWithRegionOfInterest) {
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
ImageClassifier::Create(std::move(options))); ImageClassifier::Create(std::move(options)));
// Crop around the soccer ball. // Crop around the soccer ball.
NormalizedRect image_processing_options; Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
image_processing_options.set_x_center(0.532); ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
image_processing_options.set_y_center(0.521);
image_processing_options.set_width(0.164);
image_processing_options.set_height(0.427);
for (int i = 0; i < iterations; ++i) { for (int i = 0; i < iterations; ++i) {
MP_ASSERT_OK( MP_ASSERT_OK(

View File

@ -75,6 +75,7 @@ cc_library(
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto", "//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
"//mediapipe/tasks/cc/vision/core:base_vision_task_api", "//mediapipe/tasks/cc/vision/core:base_vision_task_api",
"//mediapipe/tasks/cc/vision/core:image_processing_options",
"//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory", "//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
"//mediapipe/tasks/cc/vision/object_detector/proto:object_detector_options_cc_proto", "//mediapipe/tasks/cc/vision/object_detector/proto:object_detector_options_cc_proto",

View File

@ -34,6 +34,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h" #include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/utils.h" #include "mediapipe/tasks/cc/core/utils.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h" #include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
#include "mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options.pb.h" #include "mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options.pb.h"
@ -58,31 +59,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
using ObjectDetectorOptionsProto = using ObjectDetectorOptionsProto =
object_detector::proto::ObjectDetectorOptions; object_detector::proto::ObjectDetectorOptions;
// Returns a NormalizedRect filling the whole image. If input is present, its
// rotation is set in the returned NormalizedRect and a check is performed to
// make sure no region-of-interest was provided. Otherwise, rotation is set to
// 0.
absl::StatusOr<NormalizedRect> FillNormalizedRect(
std::optional<NormalizedRect> normalized_rect) {
NormalizedRect result;
if (normalized_rect.has_value()) {
result = *normalized_rect;
}
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
result.has_width() || result.has_height();
if (has_coordinates) {
return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument,
"ObjectDetector does not support region-of-interest.",
MediaPipeTasksStatus::kInvalidArgumentError);
}
result.set_x_center(0.5);
result.set_y_center(0.5);
result.set_width(1);
result.set_height(1);
return result;
}
// Creates a MediaPipe graph config that contains a subgraph node of // Creates a MediaPipe graph config that contains a subgraph node of
// "mediapipe.tasks.vision.ObjectDetectorGraph". If the task is running in the // "mediapipe.tasks.vision.ObjectDetectorGraph". If the task is running in the
// live stream mode, a "FlowLimiterCalculator" will be added to limit the // live stream mode, a "FlowLimiterCalculator" will be added to limit the
@ -170,15 +146,16 @@ absl::StatusOr<std::unique_ptr<ObjectDetector>> ObjectDetector::Create(
absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect( absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
mediapipe::Image image, mediapipe::Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessImageData( ProcessImageData(
@ -189,15 +166,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo( absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
mediapipe::Image image, int64 timestamp_ms, mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto output_packets, auto output_packets,
ProcessVideoData( ProcessVideoData(
@ -212,15 +190,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
absl::Status ObjectDetector::DetectAsync( absl::Status ObjectDetector::DetectAsync(
Image image, int64 timestamp_ms, Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options) { std::optional<core::ImageProcessingOptions> image_processing_options) {
if (image.UsesGpu()) { if (image.UsesGpu()) {
return CreateStatusWithPayload( return CreateStatusWithPayload(
absl::StatusCode::kInvalidArgument, absl::StatusCode::kInvalidArgument,
absl::StrCat("GPU input images are currently not supported."), absl::StrCat("GPU input images are currently not supported."),
MediaPipeTasksStatus::kRunnerUnexpectedInputError); MediaPipeTasksStatus::kRunnerUnexpectedInputError);
} }
ASSIGN_OR_RETURN(NormalizedRect norm_rect, ASSIGN_OR_RETURN(
FillNormalizedRect(image_processing_options)); NormalizedRect norm_rect,
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
return SendLiveStreamData( return SendLiveStreamData(
{{kImageInStreamName, {{kImageInStreamName,
MakePacket<Image>(std::move(image)) MakePacket<Image>(std::move(image))

View File

@ -27,9 +27,9 @@ limitations under the License.
#include "absl/status/statusor.h" #include "absl/status/statusor.h"
#include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/tasks/cc/core/base_options.h" #include "mediapipe/tasks/cc/core/base_options.h"
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h" #include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
namespace mediapipe { namespace mediapipe {
@ -154,10 +154,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// after the yuv support is implemented. // after the yuv support is implemented.
// //
// The optional 'image_processing_options' parameter can be used to specify // The optional 'image_processing_options' parameter can be used to specify
// the rotation to apply to the image before performing classification, by // the rotation to apply to the image before performing detection, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field. Note that specifying a
// anti-clockwise rotation). Note that specifying a region-of-interest using // region-of-interest using the 'region_of_interest' field is NOT supported
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
// and will result in an invalid argument error being returned. // and will result in an invalid argument error being returned.
// //
// For CPU images, the returned bounding boxes are expressed in the // For CPU images, the returned bounding boxes are expressed in the
@ -168,7 +167,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// images after enabling the gpu support in MediaPipe Tasks. // images after enabling the gpu support in MediaPipe Tasks.
absl::StatusOr<std::vector<mediapipe::Detection>> Detect( absl::StatusOr<std::vector<mediapipe::Detection>> Detect(
mediapipe::Image image, mediapipe::Image image,
std::optional<mediapipe::NormalizedRect> image_processing_options = std::optional<core::ImageProcessingOptions> image_processing_options =
std::nullopt); std::nullopt);
// Performs object detection on the provided video frame. // Performs object detection on the provided video frame.
@ -180,10 +179,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// must be monotonically increasing. // must be monotonically increasing.
// //
// The optional 'image_processing_options' parameter can be used to specify // The optional 'image_processing_options' parameter can be used to specify
// the rotation to apply to the image before performing classification, by // the rotation to apply to the image before performing detection, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field. Note that specifying a
// anti-clockwise rotation). Note that specifying a region-of-interest using // region-of-interest using the 'region_of_interest' field is NOT supported
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
// and will result in an invalid argument error being returned. // and will result in an invalid argument error being returned.
// //
// For CPU images, the returned bounding boxes are expressed in the // For CPU images, the returned bounding boxes are expressed in the
@ -192,7 +190,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// underlying image data. // underlying image data.
absl::StatusOr<std::vector<mediapipe::Detection>> DetectForVideo( absl::StatusOr<std::vector<mediapipe::Detection>> DetectForVideo(
mediapipe::Image image, int64 timestamp_ms, mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> image_processing_options = std::optional<core::ImageProcessingOptions> image_processing_options =
std::nullopt); std::nullopt);
// Sends live image data to perform object detection, and the results will be // Sends live image data to perform object detection, and the results will be
@ -206,10 +204,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// increasing. // increasing.
// //
// The optional 'image_processing_options' parameter can be used to specify // The optional 'image_processing_options' parameter can be used to specify
// the rotation to apply to the image before performing classification, by // the rotation to apply to the image before performing detection, by
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90° // setting its 'rotation_degrees' field. Note that specifying a
// anti-clockwise rotation). Note that specifying a region-of-interest using // region-of-interest using the 'region_of_interest' field is NOT supported
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
// and will result in an invalid argument error being returned. // and will result in an invalid argument error being returned.
// //
// The "result_callback" provides // The "result_callback" provides
@ -223,7 +220,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
// outside of the callback, callers need to make a copy of the image. // outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds. // - The input timestamp in milliseconds.
absl::Status DetectAsync(mediapipe::Image image, int64 timestamp_ms, absl::Status DetectAsync(mediapipe::Image image, int64 timestamp_ms,
std::optional<mediapipe::NormalizedRect> std::optional<core::ImageProcessingOptions>
image_processing_options = std::nullopt); image_processing_options = std::nullopt);
// Shuts down the ObjectDetector when all works are done. // Shuts down the ObjectDetector when all works are done.

View File

@ -31,11 +31,12 @@ limitations under the License.
#include "mediapipe/framework/deps/file_path.h" #include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image.h" #include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/location_data.pb.h" #include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/tasks/cc/components/containers/rect.h"
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h" #include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/c/common.h"
@ -64,6 +65,8 @@ namespace vision {
namespace { namespace {
using ::mediapipe::file::JoinPath; using ::mediapipe::file::JoinPath;
using ::mediapipe::tasks::components::containers::Rect;
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
using ::testing::HasSubstr; using ::testing::HasSubstr;
using ::testing::Optional; using ::testing::Optional;
@ -532,8 +535,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata); JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
ObjectDetector::Create(std::move(options))); ObjectDetector::Create(std::move(options)));
NormalizedRect image_processing_options; ImageProcessingOptions image_processing_options;
image_processing_options.set_rotation(M_PI / 2.0); image_processing_options.rotation_degrees = -90;
MP_ASSERT_OK_AND_ASSIGN( MP_ASSERT_OK_AND_ASSIGN(
auto results, object_detector->Detect(image, image_processing_options)); auto results, object_detector->Detect(image, image_processing_options));
MP_ASSERT_OK(object_detector->Close()); MP_ASSERT_OK(object_detector->Close());
@ -557,16 +560,17 @@ TEST_F(ImageModeTest, FailsWithRegionOfInterest) {
JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata); JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector, MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
ObjectDetector::Create(std::move(options))); ObjectDetector::Create(std::move(options)));
NormalizedRect image_processing_options; Rect roi{/*left=*/0.1, /*top=*/0, /*right=*/0.9, /*bottom=*/1};
image_processing_options.set_x_center(0.5); ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
image_processing_options.set_y_center(0.5);
image_processing_options.set_width(1.0);
image_processing_options.set_height(1.0);
auto results = object_detector->Detect(image, image_processing_options); auto results = object_detector->Detect(image, image_processing_options);
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument); EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
EXPECT_THAT(results.status().message(), EXPECT_THAT(results.status().message(),
HasSubstr("ObjectDetector does not support region-of-interest")); HasSubstr("This task doesn't support region-of-interest"));
EXPECT_THAT(
results.status().GetPayload(kMediaPipeTasksPayload),
Optional(absl::Cord(absl::StrCat(
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
} }
class VideoModeTest : public tflite_shims::testing::Test {}; class VideoModeTest : public tflite_shims::testing::Test {};