Internal change
PiperOrigin-RevId: 482906478
This commit is contained in:
parent
d0437b7f91
commit
4a6c23a76a
|
@ -21,12 +21,23 @@ cc_library(
|
|||
hdrs = ["running_mode.h"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_processing_options",
|
||||
hdrs = ["image_processing_options.h"],
|
||||
deps = [
|
||||
"//mediapipe/tasks/cc/components/containers:rect",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "base_vision_task_api",
|
||||
hdrs = ["base_vision_task_api.h"],
|
||||
deps = [
|
||||
":image_processing_options",
|
||||
":running_mode",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/tasks/cc/components/containers:rect",
|
||||
"//mediapipe/tasks/cc/core:base_task_api",
|
||||
"//mediapipe/tasks/cc/core:task_runner",
|
||||
"@com_google_absl//absl/status",
|
||||
|
|
|
@ -16,15 +16,20 @@ limitations under the License.
|
|||
#ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_CORE_BASE_VISION_TASK_API_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/rect.h"
|
||||
#include "mediapipe/tasks/cc/core/base_task_api.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -87,6 +92,60 @@ class BaseVisionTaskApi : public tasks::core::BaseTaskApi {
|
|||
return runner_->Send(std::move(inputs));
|
||||
}
|
||||
|
||||
// Convert from ImageProcessingOptions to NormalizedRect, performing sanity
|
||||
// checks on-the-fly. If the input ImageProcessingOptions is not present,
|
||||
// returns a default NormalizedRect covering the whole image with rotation set
|
||||
// to 0. If 'roi_allowed' is false, an error will be returned if the input
|
||||
// ImageProcessingOptions has its 'region_or_interest' field set.
|
||||
static absl::StatusOr<mediapipe::NormalizedRect> ConvertToNormalizedRect(
|
||||
std::optional<ImageProcessingOptions> options, bool roi_allowed = true) {
|
||||
mediapipe::NormalizedRect normalized_rect;
|
||||
normalized_rect.set_rotation(0);
|
||||
normalized_rect.set_x_center(0.5);
|
||||
normalized_rect.set_y_center(0.5);
|
||||
normalized_rect.set_width(1.0);
|
||||
normalized_rect.set_height(1.0);
|
||||
if (!options.has_value()) {
|
||||
return normalized_rect;
|
||||
}
|
||||
|
||||
if (options->rotation_degrees % 90 != 0) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"Expected rotation to be a multiple of 90°.",
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
|
||||
}
|
||||
// Convert to radians counter-clockwise.
|
||||
normalized_rect.set_rotation(-options->rotation_degrees * M_PI / 180.0);
|
||||
|
||||
if (options->region_of_interest.has_value()) {
|
||||
if (!roi_allowed) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"This task doesn't support region-of-interest.",
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
|
||||
}
|
||||
auto& roi = *options->region_of_interest;
|
||||
if (roi.left >= roi.right || roi.top >= roi.bottom) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"Expected Rect with left < right and top < bottom.",
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
|
||||
}
|
||||
if (roi.left < 0 || roi.top < 0 || roi.right > 1 || roi.bottom > 1) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"Expected Rect values to be in [0,1].",
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError);
|
||||
}
|
||||
normalized_rect.set_x_center((roi.left + roi.right) / 2.0);
|
||||
normalized_rect.set_y_center((roi.top + roi.bottom) / 2.0);
|
||||
normalized_rect.set_width(roi.right - roi.left);
|
||||
normalized_rect.set_height(roi.bottom - roi.top);
|
||||
}
|
||||
return normalized_rect;
|
||||
}
|
||||
|
||||
private:
|
||||
RunningMode running_mode_;
|
||||
};
|
||||
|
|
52
mediapipe/tasks/cc/vision/core/image_processing_options.h
Normal file
52
mediapipe/tasks/cc/vision/core/image_processing_options.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "mediapipe/tasks/cc/components/containers/rect.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace vision {
|
||||
namespace core {
|
||||
|
||||
// Options for image processing.
|
||||
//
|
||||
// If both region-or-interest and rotation are specified, the crop around the
|
||||
// region-of-interest is extracted first, the the specified rotation is applied
|
||||
// to the crop.
|
||||
struct ImageProcessingOptions {
|
||||
// The optional region-of-interest to crop from the image. If not specified,
|
||||
// the full image is used.
|
||||
//
|
||||
// Coordinates must be in [0,1] with 'left' < 'right' and 'top' < bottom.
|
||||
std::optional<components::containers::Rect> region_of_interest = std::nullopt;
|
||||
|
||||
// The rotation to apply to the image (or cropped region-of-interest), in
|
||||
// degrees clockwise.
|
||||
//
|
||||
// The rotation must be a multiple (positive or negative) of 90°.
|
||||
int rotation_degrees = 0;
|
||||
};
|
||||
|
||||
} // namespace core
|
||||
} // namespace vision
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_CORE_IMAGE_PROCESSING_OPTIONS_H_
|
|
@ -137,6 +137,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
||||
"//mediapipe/tasks/cc/vision/core:image_processing_options",
|
||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
||||
"//mediapipe/tasks/cc/vision/gesture_recognizer/proto:gesture_recognizer_graph_options_cc_proto",
|
||||
|
|
|
@ -39,6 +39,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options.pb.h"
|
||||
|
@ -76,31 +77,6 @@ constexpr char kHandWorldLandmarksTag[] = "WORLD_LANDMARKS";
|
|||
constexpr char kHandWorldLandmarksStreamName[] = "world_landmarks";
|
||||
constexpr int kMicroSecondsPerMilliSecond = 1000;
|
||||
|
||||
// Returns a NormalizedRect filling the whole image. If input is present, its
|
||||
// rotation is set in the returned NormalizedRect and a check is performed to
|
||||
// make sure no region-of-interest was provided. Otherwise, rotation is set to
|
||||
// 0.
|
||||
absl::StatusOr<NormalizedRect> FillNormalizedRect(
|
||||
std::optional<NormalizedRect> normalized_rect) {
|
||||
NormalizedRect result;
|
||||
if (normalized_rect.has_value()) {
|
||||
result = *normalized_rect;
|
||||
}
|
||||
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
|
||||
result.has_width() || result.has_height();
|
||||
if (has_coordinates) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"GestureRecognizer does not support region-of-interest.",
|
||||
MediaPipeTasksStatus::kInvalidArgumentError);
|
||||
}
|
||||
result.set_x_center(0.5);
|
||||
result.set_y_center(0.5);
|
||||
result.set_width(1);
|
||||
result.set_height(1);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Creates a MediaPipe graph config that contains a subgraph node of
|
||||
// "mediapipe.tasks.vision.GestureRecognizerGraph". If the task is running
|
||||
// in the live stream mode, a "FlowLimiterCalculator" will be added to limit the
|
||||
|
@ -248,15 +224,16 @@ absl::StatusOr<std::unique_ptr<GestureRecognizer>> GestureRecognizer::Create(
|
|||
|
||||
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
|
||||
mediapipe::Image image,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"GPU input images are currently not supported.",
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessImageData(
|
||||
|
@ -283,15 +260,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::Recognize(
|
|||
|
||||
absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
|
||||
mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
absl::StrCat("GPU input images are currently not supported."),
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessVideoData(
|
||||
|
@ -321,15 +299,16 @@ absl::StatusOr<GestureRecognitionResult> GestureRecognizer::RecognizeForVideo(
|
|||
|
||||
absl::Status GestureRecognizer::RecognizeAsync(
|
||||
mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
absl::StrCat("GPU input images are currently not supported."),
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
return SendLiveStreamData(
|
||||
{{kImageInStreamName,
|
||||
MakePacket<Image>(std::move(image))
|
||||
|
|
|
@ -23,10 +23,10 @@ limitations under the License.
|
|||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/gesture_recognition_result.h"
|
||||
#include "mediapipe/tasks/cc/core/base_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -129,36 +129,36 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
|
|||
// Only use this method when the GestureRecognizer is created with the image
|
||||
// running mode.
|
||||
//
|
||||
// image - mediapipe::Image
|
||||
// Image to perform hand gesture recognition on.
|
||||
// imageProcessingOptions - std::optional<NormalizedRect>
|
||||
// If provided, can be used to specify the rotation to apply to the image
|
||||
// before performing classification, by setting its 'rotation' field in
|
||||
// radians (e.g. 'M_PI / 2' for a 90° anti-clockwise rotation). Note that
|
||||
// specifying a region-of-interest using the 'x_center', 'y_center', 'width'
|
||||
// and 'height' fields is NOT supported and will result in an invalid
|
||||
// argument error being returned.
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing recognition, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// The image can be of any size with format RGB or RGBA.
|
||||
// TODO: Describes how the input image will be preprocessed
|
||||
// after the yuv support is implemented.
|
||||
// TODO: use an ImageProcessingOptions struct instead of
|
||||
// NormalizedRect.
|
||||
absl::StatusOr<components::containers::GestureRecognitionResult> Recognize(
|
||||
Image image,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options =
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||
std::nullopt);
|
||||
|
||||
// Performs gesture recognition on the provided video frame.
|
||||
// Only use this method when the GestureRecognizer is created with the video
|
||||
// running mode.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing recognition, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// The image can be of any size with format RGB or RGBA. It's required to
|
||||
// provide the video frame's timestamp (in milliseconds). The input timestamps
|
||||
// must be monotonically increasing.
|
||||
absl::StatusOr<components::containers::GestureRecognitionResult>
|
||||
RecognizeForVideo(Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect>
|
||||
std::optional<core::ImageProcessingOptions>
|
||||
image_processing_options = std::nullopt);
|
||||
|
||||
// Sends live image data to perform gesture recognition, and the results will
|
||||
|
@ -171,6 +171,12 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
|
|||
// sent to the gesture recognizer. The input timestamps must be monotonically
|
||||
// increasing.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing recognition, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// The "result_callback" provides
|
||||
// - A vector of GestureRecognitionResult, each is the recognized results
|
||||
// for a input frame.
|
||||
|
@ -180,7 +186,7 @@ class GestureRecognizer : tasks::vision::core::BaseVisionTaskApi {
|
|||
// outside of the callback, callers need to make a copy of the image.
|
||||
// - The input timestamp in milliseconds.
|
||||
absl::Status RecognizeAsync(Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect>
|
||||
std::optional<core::ImageProcessingOptions>
|
||||
image_processing_options = std::nullopt);
|
||||
|
||||
// Shuts down the GestureRecognizer when all works are done.
|
||||
|
|
|
@ -59,6 +59,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
||||
"//mediapipe/tasks/cc/vision/core:image_processing_options",
|
||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
||||
"//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_cc_proto",
|
||||
|
|
|
@ -34,6 +34,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
||||
#include "mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options.pb.h"
|
||||
|
@ -59,26 +60,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
|
|||
using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
|
||||
using ::mediapipe::tasks::core::PacketMap;
|
||||
|
||||
// Returns a NormalizedRect covering the full image if input is not present.
|
||||
// Otherwise, makes sure the x_center, y_center, width and height are set in
|
||||
// case only a rotation was provided in the input.
|
||||
NormalizedRect FillNormalizedRect(
|
||||
std::optional<NormalizedRect> normalized_rect) {
|
||||
NormalizedRect result;
|
||||
if (normalized_rect.has_value()) {
|
||||
result = *normalized_rect;
|
||||
}
|
||||
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
|
||||
result.has_width() || result.has_height();
|
||||
if (!has_coordinates) {
|
||||
result.set_x_center(0.5);
|
||||
result.set_y_center(0.5);
|
||||
result.set_width(1);
|
||||
result.set_height(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Creates a MediaPipe graph config that contains a subgraph node of
|
||||
// type "ImageClassifierGraph". If the task is running in the live stream mode,
|
||||
// a "FlowLimiterCalculator" will be added to limit the number of frames in
|
||||
|
@ -164,14 +145,16 @@ absl::StatusOr<std::unique_ptr<ImageClassifier>> ImageClassifier::Create(
|
|||
}
|
||||
|
||||
absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
|
||||
Image image, std::optional<NormalizedRect> image_processing_options) {
|
||||
Image image,
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"GPU input images are currently not supported.",
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessImageData(
|
||||
|
@ -183,14 +166,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::Classify(
|
|||
|
||||
absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
|
||||
Image image, int64 timestamp_ms,
|
||||
std::optional<NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"GPU input images are currently not supported.",
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessVideoData(
|
||||
|
@ -206,14 +190,15 @@ absl::StatusOr<ClassificationResult> ImageClassifier::ClassifyForVideo(
|
|||
|
||||
absl::Status ImageClassifier::ClassifyAsync(
|
||||
Image image, int64 timestamp_ms,
|
||||
std::optional<NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"GPU input images are currently not supported.",
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
NormalizedRect norm_rect = FillNormalizedRect(image_processing_options);
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options));
|
||||
return SendLiveStreamData(
|
||||
{{kImageInStreamName,
|
||||
MakePacket<Image>(std::move(image))
|
||||
|
|
|
@ -22,11 +22,11 @@ limitations under the License.
|
|||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/processors/classifier_options.h"
|
||||
#include "mediapipe/tasks/cc/core/base_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -109,12 +109,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify:
|
||||
// - the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation).
|
||||
// setting its 'rotation_degrees' field.
|
||||
// and/or
|
||||
// - the region-of-interest on which to perform classification, by setting its
|
||||
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is
|
||||
// set, they will automatically be set to cover the full image.
|
||||
// 'region_of_interest' field. If not specified, the full image is used.
|
||||
// If both are specified, the crop around the region-of-interest is extracted
|
||||
// first, then the specified rotation is applied to the crop.
|
||||
//
|
||||
|
@ -126,19 +124,17 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
// YUVToImageCalculator is integrated.
|
||||
absl::StatusOr<components::containers::proto::ClassificationResult> Classify(
|
||||
mediapipe::Image image,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options =
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||
std::nullopt);
|
||||
|
||||
// Performs image classification on the provided video frame.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify:
|
||||
// - the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation).
|
||||
// setting its 'rotation_degrees' field.
|
||||
// and/or
|
||||
// - the region-of-interest on which to perform classification, by setting its
|
||||
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is
|
||||
// set, they will automatically be set to cover the full image.
|
||||
// 'region_of_interest' field. If not specified, the full image is used.
|
||||
// If both are specified, the crop around the region-of-interest is extracted
|
||||
// first, then the specified rotation is applied to the crop.
|
||||
//
|
||||
|
@ -150,7 +146,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
// must be monotonically increasing.
|
||||
absl::StatusOr<components::containers::proto::ClassificationResult>
|
||||
ClassifyForVideo(mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect>
|
||||
std::optional<core::ImageProcessingOptions>
|
||||
image_processing_options = std::nullopt);
|
||||
|
||||
// Sends live image data to image classification, and the results will be
|
||||
|
@ -158,12 +154,10 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify:
|
||||
// - the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation).
|
||||
// setting its 'rotation_degrees' field.
|
||||
// and/or
|
||||
// - the region-of-interest on which to perform classification, by setting its
|
||||
// 'x_center', 'y_center', 'width' and 'height' fields. If none of these is
|
||||
// set, they will automatically be set to cover the full image.
|
||||
// 'region_of_interest' field. If not specified, the full image is used.
|
||||
// If both are specified, the crop around the region-of-interest is extracted
|
||||
// first, then the specified rotation is applied to the crop.
|
||||
//
|
||||
|
@ -175,7 +169,7 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
// sent to the object detector. The input timestamps must be monotonically
|
||||
// increasing.
|
||||
//
|
||||
// The "result_callback" prvoides
|
||||
// The "result_callback" provides:
|
||||
// - The classification results as a ClassificationResult object.
|
||||
// - The const reference to the corresponding input image that the image
|
||||
// classifier runs on. Note that the const reference to the image will no
|
||||
|
@ -183,12 +177,9 @@ class ImageClassifier : tasks::vision::core::BaseVisionTaskApi {
|
|||
// outside of the callback, callers need to make a copy of the image.
|
||||
// - The input timestamp in milliseconds.
|
||||
absl::Status ClassifyAsync(mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect>
|
||||
std::optional<core::ImageProcessingOptions>
|
||||
image_processing_options = std::nullopt);
|
||||
|
||||
// TODO: add Classify() variants taking a region of interest as
|
||||
// additional argument.
|
||||
|
||||
// Shuts down the ImageClassifier when all works are done.
|
||||
absl::Status Close() { return runner_->Close(); }
|
||||
};
|
||||
|
|
|
@ -27,7 +27,6 @@ limitations under the License.
|
|||
#include "absl/strings/str_format.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
|
@ -35,6 +34,8 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/proto/category.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/rect.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
|
@ -49,9 +50,11 @@ namespace image_classifier {
|
|||
namespace {
|
||||
|
||||
using ::mediapipe::file::JoinPath;
|
||||
using ::mediapipe::tasks::components::containers::Rect;
|
||||
using ::mediapipe::tasks::components::containers::proto::ClassificationEntry;
|
||||
using ::mediapipe::tasks::components::containers::proto::ClassificationResult;
|
||||
using ::mediapipe::tasks::components::containers::proto::Classifications;
|
||||
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
|
||||
using ::testing::HasSubstr;
|
||||
using ::testing::Optional;
|
||||
|
||||
|
@ -547,12 +550,9 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterest) {
|
|||
options->classifier_options.max_results = 1;
|
||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
|
||||
ImageClassifier::Create(std::move(options)));
|
||||
// Crop around the soccer ball.
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_x_center(0.532);
|
||||
image_processing_options.set_y_center(0.521);
|
||||
image_processing_options.set_width(0.164);
|
||||
image_processing_options.set_height(0.427);
|
||||
// Region-of-interest around the soccer ball.
|
||||
Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
|
||||
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
|
||||
image, image_processing_options));
|
||||
|
@ -572,8 +572,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
|
|||
ImageClassifier::Create(std::move(options)));
|
||||
|
||||
// Specify a 90° anti-clockwise rotation.
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_rotation(M_PI / 2.0);
|
||||
ImageProcessingOptions image_processing_options;
|
||||
image_processing_options.rotation_degrees = -90;
|
||||
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
|
||||
image, image_processing_options));
|
||||
|
@ -616,13 +616,10 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
|
|||
options->classifier_options.max_results = 1;
|
||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
|
||||
ImageClassifier::Create(std::move(options)));
|
||||
// Crop around the chair, with 90° anti-clockwise rotation.
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_x_center(0.2821);
|
||||
image_processing_options.set_y_center(0.2406);
|
||||
image_processing_options.set_width(0.5642);
|
||||
image_processing_options.set_height(0.1286);
|
||||
image_processing_options.set_rotation(M_PI / 2.0);
|
||||
// Region-of-interest around the chair, with 90° anti-clockwise rotation.
|
||||
Rect roi{/*left=*/0.006, /*top=*/0.1763, /*right=*/0.5702, /*bottom=*/0.3049};
|
||||
ImageProcessingOptions image_processing_options{roi,
|
||||
/*rotation_degrees=*/-90};
|
||||
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto results, image_classifier->Classify(
|
||||
image, image_processing_options));
|
||||
|
@ -633,7 +630,7 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
|
|||
entries {
|
||||
categories {
|
||||
index: 560
|
||||
score: 0.6800408
|
||||
score: 0.6522213
|
||||
category_name: "folding chair"
|
||||
}
|
||||
timestamp_ms: 0
|
||||
|
@ -643,6 +640,69 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
|
|||
})pb"));
|
||||
}
|
||||
|
||||
// Testing all these once with ImageClassifier.
|
||||
TEST_F(ImageModeTest, FailsWithInvalidImageProcessingOptions) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(Image image,
|
||||
DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
|
||||
"multi_objects.jpg")));
|
||||
auto options = std::make_unique<ImageClassifierOptions>();
|
||||
options->base_options.model_asset_path =
|
||||
JoinPath("./", kTestDataDirectory, kMobileNetFloatWithMetadata);
|
||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
|
||||
ImageClassifier::Create(std::move(options)));
|
||||
|
||||
// Invalid: left > right.
|
||||
Rect roi{/*left=*/0.9, /*top=*/0, /*right=*/0.1, /*bottom=*/1};
|
||||
ImageProcessingOptions image_processing_options{roi,
|
||||
/*rotation_degrees=*/0};
|
||||
auto results = image_classifier->Classify(image, image_processing_options);
|
||||
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(results.status().message(),
|
||||
HasSubstr("Expected Rect with left < right and top < bottom"));
|
||||
EXPECT_THAT(
|
||||
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||
Optional(absl::Cord(absl::StrCat(
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||
|
||||
// Invalid: top > bottom.
|
||||
roi = {/*left=*/0, /*top=*/0.9, /*right=*/1, /*bottom=*/0.1};
|
||||
image_processing_options = {roi,
|
||||
/*rotation_degrees=*/0};
|
||||
results = image_classifier->Classify(image, image_processing_options);
|
||||
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(results.status().message(),
|
||||
HasSubstr("Expected Rect with left < right and top < bottom"));
|
||||
EXPECT_THAT(
|
||||
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||
Optional(absl::Cord(absl::StrCat(
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||
|
||||
// Invalid: coordinates out of [0,1] range.
|
||||
roi = {/*left=*/-0.1, /*top=*/0, /*right=*/1, /*bottom=*/1};
|
||||
image_processing_options = {roi,
|
||||
/*rotation_degrees=*/0};
|
||||
results = image_classifier->Classify(image, image_processing_options);
|
||||
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(results.status().message(),
|
||||
HasSubstr("Expected Rect values to be in [0,1]"));
|
||||
EXPECT_THAT(
|
||||
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||
Optional(absl::Cord(absl::StrCat(
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||
|
||||
// Invalid: rotation not a multiple of 90°.
|
||||
image_processing_options = {/*region_of_interest=*/std::nullopt,
|
||||
/*rotation_degrees=*/1};
|
||||
results = image_classifier->Classify(image, image_processing_options);
|
||||
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(results.status().message(),
|
||||
HasSubstr("Expected rotation to be a multiple of 90°"));
|
||||
EXPECT_THAT(
|
||||
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||
Optional(absl::Cord(absl::StrCat(
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||
}
|
||||
|
||||
class VideoModeTest : public tflite_shims::testing::Test {};
|
||||
|
||||
TEST_F(VideoModeTest, FailsWithCallingWrongMethod) {
|
||||
|
@ -732,11 +792,9 @@ TEST_F(VideoModeTest, SucceedsWithRegionOfInterest) {
|
|||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
|
||||
ImageClassifier::Create(std::move(options)));
|
||||
// Crop around the soccer ball.
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_x_center(0.532);
|
||||
image_processing_options.set_y_center(0.521);
|
||||
image_processing_options.set_width(0.164);
|
||||
image_processing_options.set_height(0.427);
|
||||
// Region-of-interest around the soccer ball.
|
||||
Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
|
||||
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(
|
||||
|
@ -877,11 +935,8 @@ TEST_F(LiveStreamModeTest, SucceedsWithRegionOfInterest) {
|
|||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageClassifier> image_classifier,
|
||||
ImageClassifier::Create(std::move(options)));
|
||||
// Crop around the soccer ball.
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_x_center(0.532);
|
||||
image_processing_options.set_y_center(0.521);
|
||||
image_processing_options.set_width(0.164);
|
||||
image_processing_options.set_height(0.427);
|
||||
Rect roi{/*left=*/0.45, /*top=*/0.3075, /*right=*/0.614, /*bottom=*/0.7345};
|
||||
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
MP_ASSERT_OK(
|
||||
|
|
|
@ -75,6 +75,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
||||
"//mediapipe/tasks/cc/vision/core:image_processing_options",
|
||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
||||
"//mediapipe/tasks/cc/vision/object_detector/proto:object_detector_options_cc_proto",
|
||||
|
|
|
@ -34,6 +34,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
||||
#include "mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options.pb.h"
|
||||
|
@ -58,31 +59,6 @@ constexpr int kMicroSecondsPerMilliSecond = 1000;
|
|||
using ObjectDetectorOptionsProto =
|
||||
object_detector::proto::ObjectDetectorOptions;
|
||||
|
||||
// Returns a NormalizedRect filling the whole image. If input is present, its
|
||||
// rotation is set in the returned NormalizedRect and a check is performed to
|
||||
// make sure no region-of-interest was provided. Otherwise, rotation is set to
|
||||
// 0.
|
||||
absl::StatusOr<NormalizedRect> FillNormalizedRect(
|
||||
std::optional<NormalizedRect> normalized_rect) {
|
||||
NormalizedRect result;
|
||||
if (normalized_rect.has_value()) {
|
||||
result = *normalized_rect;
|
||||
}
|
||||
bool has_coordinates = result.has_x_center() || result.has_y_center() ||
|
||||
result.has_width() || result.has_height();
|
||||
if (has_coordinates) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
"ObjectDetector does not support region-of-interest.",
|
||||
MediaPipeTasksStatus::kInvalidArgumentError);
|
||||
}
|
||||
result.set_x_center(0.5);
|
||||
result.set_y_center(0.5);
|
||||
result.set_width(1);
|
||||
result.set_height(1);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Creates a MediaPipe graph config that contains a subgraph node of
|
||||
// "mediapipe.tasks.vision.ObjectDetectorGraph". If the task is running in the
|
||||
// live stream mode, a "FlowLimiterCalculator" will be added to limit the
|
||||
|
@ -170,15 +146,16 @@ absl::StatusOr<std::unique_ptr<ObjectDetector>> ObjectDetector::Create(
|
|||
|
||||
absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
|
||||
mediapipe::Image image,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
absl::StrCat("GPU input images are currently not supported."),
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessImageData(
|
||||
|
@ -189,15 +166,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::Detect(
|
|||
|
||||
absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
|
||||
mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
absl::StrCat("GPU input images are currently not supported."),
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
ASSIGN_OR_RETURN(
|
||||
auto output_packets,
|
||||
ProcessVideoData(
|
||||
|
@ -212,15 +190,16 @@ absl::StatusOr<std::vector<Detection>> ObjectDetector::DetectForVideo(
|
|||
|
||||
absl::Status ObjectDetector::DetectAsync(
|
||||
Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options) {
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||
if (image.UsesGpu()) {
|
||||
return CreateStatusWithPayload(
|
||||
absl::StatusCode::kInvalidArgument,
|
||||
absl::StrCat("GPU input images are currently not supported."),
|
||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||
}
|
||||
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||
FillNormalizedRect(image_processing_options));
|
||||
ASSIGN_OR_RETURN(
|
||||
NormalizedRect norm_rect,
|
||||
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||
return SendLiveStreamData(
|
||||
{{kImageInStreamName,
|
||||
MakePacket<Image>(std::move(image))
|
||||
|
|
|
@ -27,9 +27,9 @@ limitations under the License.
|
|||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/base_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -154,10 +154,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// after the yuv support is implemented.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation). Note that specifying a region-of-interest using
|
||||
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
|
||||
// the rotation to apply to the image before performing detection, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// For CPU images, the returned bounding boxes are expressed in the
|
||||
|
@ -168,7 +167,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// images after enabling the gpu support in MediaPipe Tasks.
|
||||
absl::StatusOr<std::vector<mediapipe::Detection>> Detect(
|
||||
mediapipe::Image image,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options =
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||
std::nullopt);
|
||||
|
||||
// Performs object detection on the provided video frame.
|
||||
|
@ -180,10 +179,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// must be monotonically increasing.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation). Note that specifying a region-of-interest using
|
||||
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
|
||||
// the rotation to apply to the image before performing detection, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// For CPU images, the returned bounding boxes are expressed in the
|
||||
|
@ -192,7 +190,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// underlying image data.
|
||||
absl::StatusOr<std::vector<mediapipe::Detection>> DetectForVideo(
|
||||
mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect> image_processing_options =
|
||||
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||
std::nullopt);
|
||||
|
||||
// Sends live image data to perform object detection, and the results will be
|
||||
|
@ -206,10 +204,9 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// increasing.
|
||||
//
|
||||
// The optional 'image_processing_options' parameter can be used to specify
|
||||
// the rotation to apply to the image before performing classification, by
|
||||
// setting its 'rotation' field in radians (e.g. 'M_PI / 2' for a 90°
|
||||
// anti-clockwise rotation). Note that specifying a region-of-interest using
|
||||
// the 'x_center', 'y_center', 'width' and 'height' fields is NOT supported
|
||||
// the rotation to apply to the image before performing detection, by
|
||||
// setting its 'rotation_degrees' field. Note that specifying a
|
||||
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||
// and will result in an invalid argument error being returned.
|
||||
//
|
||||
// The "result_callback" provides
|
||||
|
@ -223,7 +220,7 @@ class ObjectDetector : tasks::vision::core::BaseVisionTaskApi {
|
|||
// outside of the callback, callers need to make a copy of the image.
|
||||
// - The input timestamp in milliseconds.
|
||||
absl::Status DetectAsync(mediapipe::Image image, int64 timestamp_ms,
|
||||
std::optional<mediapipe::NormalizedRect>
|
||||
std::optional<core::ImageProcessingOptions>
|
||||
image_processing_options = std::nullopt);
|
||||
|
||||
// Shuts down the ObjectDetector when all works are done.
|
||||
|
|
|
@ -31,11 +31,12 @@ limitations under the License.
|
|||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
#include "mediapipe/tasks/cc/components/containers/rect.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
@ -64,6 +65,8 @@ namespace vision {
|
|||
namespace {
|
||||
|
||||
using ::mediapipe::file::JoinPath;
|
||||
using ::mediapipe::tasks::components::containers::Rect;
|
||||
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
|
||||
using ::testing::HasSubstr;
|
||||
using ::testing::Optional;
|
||||
|
||||
|
@ -532,8 +535,8 @@ TEST_F(ImageModeTest, SucceedsWithRotation) {
|
|||
JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
|
||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
|
||||
ObjectDetector::Create(std::move(options)));
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_rotation(M_PI / 2.0);
|
||||
ImageProcessingOptions image_processing_options;
|
||||
image_processing_options.rotation_degrees = -90;
|
||||
MP_ASSERT_OK_AND_ASSIGN(
|
||||
auto results, object_detector->Detect(image, image_processing_options));
|
||||
MP_ASSERT_OK(object_detector->Close());
|
||||
|
@ -557,16 +560,17 @@ TEST_F(ImageModeTest, FailsWithRegionOfInterest) {
|
|||
JoinPath("./", kTestDataDirectory, kMobileSsdWithMetadata);
|
||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ObjectDetector> object_detector,
|
||||
ObjectDetector::Create(std::move(options)));
|
||||
NormalizedRect image_processing_options;
|
||||
image_processing_options.set_x_center(0.5);
|
||||
image_processing_options.set_y_center(0.5);
|
||||
image_processing_options.set_width(1.0);
|
||||
image_processing_options.set_height(1.0);
|
||||
Rect roi{/*left=*/0.1, /*top=*/0, /*right=*/0.9, /*bottom=*/1};
|
||||
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||
|
||||
auto results = object_detector->Detect(image, image_processing_options);
|
||||
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(results.status().message(),
|
||||
HasSubstr("ObjectDetector does not support region-of-interest"));
|
||||
HasSubstr("This task doesn't support region-of-interest"));
|
||||
EXPECT_THAT(
|
||||
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||
Optional(absl::Cord(absl::StrCat(
|
||||
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||
}
|
||||
|
||||
class VideoModeTest : public tflite_shims::testing::Test {};
|
||||
|
|
Loading…
Reference in New Issue
Block a user