From 626f92caea38082bd64f4994635d04b0f643d3ba Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 10 Feb 2023 21:54:49 -0800 Subject: [PATCH] Move BuildInputImageTensorSpecs to utils PiperOrigin-RevId: 508829724 --- .../processors/image_preprocessing_graph.cc | 28 +------------------ .../hand_landmarks_detector_graph.cc | 28 +------------------ mediapipe/tasks/cc/vision/utils/BUILD | 9 ++++-- .../cc/vision/utils/image_tensor_specs.cc | 26 +++++++++++++++++ .../cc/vision/utils/image_tensor_specs.h | 6 ++++ .../vision/utils/image_tensor_specs_test.cc | 22 +++++++++++++++ 6 files changed, 62 insertions(+), 57 deletions(-) diff --git a/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc b/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc index fefc1ec52..7093d96fe 100644 --- a/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc +++ b/mediapipe/tasks/cc/components/processors/image_preprocessing_graph.cc @@ -71,32 +71,6 @@ struct ImagePreprocessingOutputStreams { Source image; }; -// Builds an ImageTensorSpecs for configuring the preprocessing calculators. -absl::StatusOr BuildImageTensorSpecs( - const ModelResources& model_resources) { - const tflite::Model& model = *model_resources.GetTfLiteModel(); - if (model.subgraphs()->size() != 1) { - return CreateStatusWithPayload( - absl::StatusCode::kInvalidArgument, - "Image tflite models are assumed to have a single subgraph.", - MediaPipeTasksStatus::kInvalidArgumentError); - } - const auto* primary_subgraph = (*model.subgraphs())[0]; - if (primary_subgraph->inputs()->size() != 1) { - return CreateStatusWithPayload( - absl::StatusCode::kInvalidArgument, - "Image tflite models are assumed to have a single input.", - MediaPipeTasksStatus::kInvalidArgumentError); - } - const auto* input_tensor = - (*primary_subgraph->tensors())[(*primary_subgraph->inputs())[0]]; - ASSIGN_OR_RETURN(const auto* image_tensor_metadata, - vision::GetImageTensorMetadataIfAny( - *model_resources.GetMetadataExtractor(), 0)); - return vision::BuildInputImageTensorSpecs(*input_tensor, - image_tensor_metadata); -} - // Fills in the ImageToTensorCalculatorOptions based on the ImageTensorSpecs. absl::Status ConfigureImageToTensorCalculator( const ImageTensorSpecs& image_tensor_specs, @@ -150,7 +124,7 @@ absl::Status ConfigureImagePreprocessingGraph( const ModelResources& model_resources, bool use_gpu, proto::ImagePreprocessingGraphOptions* options) { ASSIGN_OR_RETURN(auto image_tensor_specs, - BuildImageTensorSpecs(model_resources)); + vision::BuildInputImageTensorSpecs(model_resources)); MP_RETURN_IF_ERROR(ConfigureImageToTensorCalculator( image_tensor_specs, options->mutable_image_to_tensor_options())); // The GPU backend isn't able to process int data. If the input tensor is diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc index 914bc30fc..6d232d3f1 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc @@ -109,32 +109,6 @@ absl::Status SanityCheckOptions( return absl::OkStatus(); } -// Builds an ImageTensorSpecs for configuring the image preprocessing subgraph. -absl::StatusOr BuildImageTensorSpecs( - const ModelResources& model_resources) { - const tflite::Model& model = *model_resources.GetTfLiteModel(); - if (model.subgraphs()->size() != 1) { - return CreateStatusWithPayload( - absl::StatusCode::kInvalidArgument, - "Hand landmark model is assumed to have a single subgraph.", - MediaPipeTasksStatus::kInvalidArgumentError); - } - const auto* primary_subgraph = (*model.subgraphs())[0]; - if (primary_subgraph->inputs()->size() != 1) { - return CreateStatusWithPayload( - absl::StatusCode::kInvalidArgument, - "Hand landmark model is assumed to have a single input.", - MediaPipeTasksStatus::kInvalidArgumentError); - } - const auto* input_tensor = - (*primary_subgraph->tensors())[(*primary_subgraph->inputs())[0]]; - ASSIGN_OR_RETURN(const auto* image_tensor_metadata, - vision::GetImageTensorMetadataIfAny( - *model_resources.GetMetadataExtractor(), 0)); - return vision::BuildInputImageTensorSpecs(*input_tensor, - image_tensor_metadata); -} - // Split hand landmark detection model output tensor into four parts, // representing landmarks, presence scores, handedness, and world landmarks, // respectively. @@ -297,7 +271,7 @@ class SingleHandLandmarksDetectorGraph : public core::ModelTaskGraph { auto image_size = preprocessing[Output>("IMAGE_SIZE")]; ASSIGN_OR_RETURN(auto image_tensor_specs, - BuildImageTensorSpecs(model_resources)); + BuildInputImageTensorSpecs(model_resources)); auto& inference = AddInference( model_resources, subgraph_options.base_options().acceleration(), graph); diff --git a/mediapipe/tasks/cc/vision/utils/BUILD b/mediapipe/tasks/cc/vision/utils/BUILD index fda33bea5..e2d4c5651 100644 --- a/mediapipe/tasks/cc/vision/utils/BUILD +++ b/mediapipe/tasks/cc/vision/utils/BUILD @@ -12,16 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_test_with_tflite") +load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite", "cc_test_with_tflite") package(default_visibility = ["//mediapipe/tasks:internal"]) licenses(["notice"]) -cc_library( +cc_library_with_tflite( name = "image_tensor_specs", srcs = ["image_tensor_specs.cc"], hdrs = ["image_tensor_specs.h"], + tflite_deps = [ + "//mediapipe/tasks/cc/core:model_resources", + ], deps = [ "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:status", @@ -44,11 +47,11 @@ cc_test_with_tflite( srcs = ["image_tensor_specs_test.cc"], data = ["//mediapipe/tasks/testdata/vision:test_models"], tflite_deps = [ + ":image_tensor_specs", "//mediapipe/tasks/cc/core:model_resources", "@org_tensorflow//tensorflow/lite/core/shims:cc_shims_test_util", ], deps = [ - ":image_tensor_specs", "//mediapipe/framework/deps:file_path", "//mediapipe/framework/port:gtest_main", "//mediapipe/tasks/cc:common", diff --git a/mediapipe/tasks/cc/vision/utils/image_tensor_specs.cc b/mediapipe/tasks/cc/vision/utils/image_tensor_specs.cc index c8b147b0f..6b67047c5 100644 --- a/mediapipe/tasks/cc/vision/utils/image_tensor_specs.cc +++ b/mediapipe/tasks/cc/vision/utils/image_tensor_specs.cc @@ -236,6 +236,32 @@ absl::StatusOr BuildInputImageTensorSpecs( return result; } +// Builds an ImageTensorSpecs for configuring the preprocessing calculators. +absl::StatusOr BuildInputImageTensorSpecs( + const core::ModelResources& model_resources) { + const tflite::Model& model = *model_resources.GetTfLiteModel(); + if (model.subgraphs()->size() != 1) { + return CreateStatusWithPayload( + absl::StatusCode::kInvalidArgument, + "Image tflite models are assumed to have a single subgraph.", + MediaPipeTasksStatus::kInvalidArgumentError); + } + const auto* primary_subgraph = (*model.subgraphs())[0]; + if (primary_subgraph->inputs()->size() != 1) { + return CreateStatusWithPayload( + absl::StatusCode::kInvalidArgument, + "Image tflite models are assumed to have a single input.", + MediaPipeTasksStatus::kInvalidArgumentError); + } + const auto* input_tensor = + (*primary_subgraph->tensors())[(*primary_subgraph->inputs())[0]]; + ASSIGN_OR_RETURN(const auto* image_tensor_metadata, + vision::GetImageTensorMetadataIfAny( + *model_resources.GetMetadataExtractor(), 0)); + return vision::BuildInputImageTensorSpecs(*input_tensor, + image_tensor_metadata); +} + } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/utils/image_tensor_specs.h b/mediapipe/tasks/cc/vision/utils/image_tensor_specs.h index bc8ff95d4..0c0d4c81a 100644 --- a/mediapipe/tasks/cc/vision/utils/image_tensor_specs.h +++ b/mediapipe/tasks/cc/vision/utils/image_tensor_specs.h @@ -20,6 +20,7 @@ limitations under the License. #include "absl/status/statusor.h" #include "absl/types/optional.h" +#include "mediapipe/tasks/cc/core/model_resources.h" #include "mediapipe/tasks/cc/metadata/metadata_extractor.h" #include "mediapipe/tasks/metadata/metadata_schema_generated.h" @@ -90,6 +91,11 @@ absl::StatusOr BuildInputImageTensorSpecs( const tflite::Tensor& image_tensor, const tflite::TensorMetadata* image_tensor_metadata); +// Build ImageTensorSpec from model resources. The tflite model must contain +// single subgraph with single input tensor. +absl::StatusOr BuildInputImageTensorSpecs( + const core::ModelResources& model_resources); + } // namespace vision } // namespace tasks } // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/utils/image_tensor_specs_test.cc b/mediapipe/tasks/cc/vision/utils/image_tensor_specs_test.cc index f9289dc90..5d6fcf98c 100644 --- a/mediapipe/tasks/cc/vision/utils/image_tensor_specs_test.cc +++ b/mediapipe/tasks/cc/vision/utils/image_tensor_specs_test.cc @@ -171,6 +171,28 @@ TEST_F(ImageTensorSpecsTest, EXPECT_EQ(input_specs.normalization_options, absl::nullopt); } +TEST_F(ImageTensorSpecsTest, BuildInputImageTensorSpecsFromModelResources) { + auto model_file = std::make_unique(); + model_file->set_file_name( + JoinPath("./", kTestDataDirectory, kMobileNetQuantizedPartialMetadata)); + MP_ASSERT_OK_AND_ASSIGN(auto model_resources, + core::ModelResources::Create(kTestModelResourcesTag, + std::move(model_file))); + const tflite::Model* model = model_resources->GetTfLiteModel(); + CHECK(model != nullptr); + absl::StatusOr input_specs_or = + BuildInputImageTensorSpecs(*model_resources); + MP_ASSERT_OK(input_specs_or); + + const ImageTensorSpecs& input_specs = input_specs_or.value(); + EXPECT_EQ(input_specs.image_width, 224); + EXPECT_EQ(input_specs.image_height, 224); + EXPECT_EQ(input_specs.color_space, ColorSpaceType_RGB); + EXPECT_STREQ(EnumNameTensorType(input_specs.tensor_type), + EnumNameTensorType(tflite::TensorType_UINT8)); + EXPECT_EQ(input_specs.normalization_options, absl::nullopt); +} + } // namespace } // namespace vision } // namespace tasks