291 lines
12 KiB
C++
291 lines
12 KiB
C++
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "mediapipe/tasks/cc/components/image_preprocessing.h"
|
|
|
|
#include <array>
|
|
#include <complex>
|
|
#include <limits>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "absl/status/status.h"
|
|
#include "absl/status/statusor.h"
|
|
#include "mediapipe/calculators/image/image_clone_calculator.pb.h"
|
|
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
|
#include "mediapipe/framework/api2/builder.h"
|
|
#include "mediapipe/framework/api2/port.h"
|
|
#include "mediapipe/framework/calculator_framework.h"
|
|
#include "mediapipe/framework/formats/image.h"
|
|
#include "mediapipe/framework/formats/rect.pb.h"
|
|
#include "mediapipe/framework/formats/tensor.h"
|
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
|
#include "mediapipe/tasks/cc/common.h"
|
|
#include "mediapipe/tasks/cc/components/image_preprocessing_options.pb.h"
|
|
#include "mediapipe/tasks/cc/core/model_resources.h"
|
|
#include "mediapipe/tasks/cc/core/proto/acceleration.pb.h"
|
|
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
|
#include "tensorflow/lite/schema/schema_generated.h"
|
|
|
|
namespace mediapipe {
|
|
namespace tasks {
|
|
namespace components {
|
|
namespace {
|
|
|
|
using ::mediapipe::Tensor;
|
|
using ::mediapipe::api2::Input;
|
|
using ::mediapipe::api2::Output;
|
|
using ::mediapipe::api2::builder::Graph;
|
|
using ::mediapipe::api2::builder::Source;
|
|
using ::mediapipe::tasks::core::ModelResources;
|
|
using ::mediapipe::tasks::vision::ImageTensorSpecs;
|
|
|
|
constexpr char kImageTag[] = "IMAGE";
|
|
constexpr char kNormRectTag[] = "NORM_RECT";
|
|
constexpr char kMatrixTag[] = "MATRIX";
|
|
constexpr char kTensorsTag[] = "TENSORS";
|
|
constexpr char kSizeTag[] = "SIZE";
|
|
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
|
constexpr char kLetterboxPaddingTag[] = "LETTERBOX_PADDING";
|
|
|
|
// Struct holding the different output streams produced by the subgraph.
|
|
struct ImagePreprocessingOutputStreams {
|
|
Source<std::vector<Tensor>> tensors;
|
|
Source<std::array<float, 16>> matrix;
|
|
Source<std::array<float, 4>> letterbox_padding;
|
|
Source<std::pair<int, int>> image_size;
|
|
Source<Image> image;
|
|
};
|
|
|
|
// Builds an ImageTensorSpecs for configuring the preprocessing calculators.
|
|
absl::StatusOr<ImageTensorSpecs> BuildImageTensorSpecs(
|
|
const ModelResources& model_resources) {
|
|
const tflite::Model& model = *model_resources.GetTfLiteModel();
|
|
if (model.subgraphs()->size() != 1) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInvalidArgument,
|
|
"Image tflite models are assumed to have a single subgraph.",
|
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
|
}
|
|
const auto* primary_subgraph = (*model.subgraphs())[0];
|
|
if (primary_subgraph->inputs()->size() != 1) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInvalidArgument,
|
|
"Image tflite models are assumed to have a single input.",
|
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
|
}
|
|
const auto* input_tensor =
|
|
(*primary_subgraph->tensors())[(*primary_subgraph->inputs())[0]];
|
|
ASSIGN_OR_RETURN(const auto* image_tensor_metadata,
|
|
vision::GetImageTensorMetadataIfAny(
|
|
*model_resources.GetMetadataExtractor(), 0));
|
|
return vision::BuildInputImageTensorSpecs(*input_tensor,
|
|
image_tensor_metadata);
|
|
}
|
|
|
|
// Fills in the ImageToTensorCalculatorOptions based on the ImageTensorSpecs.
|
|
absl::Status ConfigureImageToTensorCalculator(
|
|
const ImageTensorSpecs& image_tensor_specs,
|
|
mediapipe::ImageToTensorCalculatorOptions* options) {
|
|
options->set_output_tensor_width(image_tensor_specs.image_width);
|
|
options->set_output_tensor_height(image_tensor_specs.image_height);
|
|
if (image_tensor_specs.tensor_type == tflite::TensorType_UINT8) {
|
|
options->mutable_output_tensor_uint_range()->set_min(0);
|
|
options->mutable_output_tensor_uint_range()->set_max(255);
|
|
} else {
|
|
const auto& normalization_options =
|
|
image_tensor_specs.normalization_options;
|
|
float mean = normalization_options->mean_values[0];
|
|
float std = normalization_options->std_values[0];
|
|
// TODO: Add support for per-channel normalization values.
|
|
for (int i = 1; i < normalization_options->num_values; ++i) {
|
|
if (normalization_options->mean_values[i] != mean ||
|
|
normalization_options->std_values[i] != std) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kUnimplemented,
|
|
"Per-channel image normalization is not available.");
|
|
}
|
|
}
|
|
if (std::abs(std) < std::numeric_limits<float>::epsilon()) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInternal,
|
|
"NormalizationOptions.std_values can't be 0. Please check if the "
|
|
"tensor metadata has been populated correctly.");
|
|
}
|
|
// Deduce min and max range from normalization options by applying the
|
|
// normalization formula to the numerical limits of uint8, i.e:
|
|
// output = (input - mean) / std
|
|
options->mutable_output_tensor_float_range()->set_min((0.0f - mean) / std);
|
|
options->mutable_output_tensor_float_range()->set_max((255.0f - mean) /
|
|
std);
|
|
}
|
|
// TODO: need to support different GPU origin on differnt
|
|
// platforms or applications.
|
|
options->set_gpu_origin(mediapipe::GpuOrigin::TOP_LEFT);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool DetermineImagePreprocessingGpuBackend(
|
|
const core::proto::Acceleration& acceleration) {
|
|
return acceleration.has_gpu();
|
|
}
|
|
|
|
absl::Status ConfigureImagePreprocessing(const ModelResources& model_resources,
|
|
bool use_gpu,
|
|
ImagePreprocessingOptions* options) {
|
|
ASSIGN_OR_RETURN(auto image_tensor_specs,
|
|
BuildImageTensorSpecs(model_resources));
|
|
MP_RETURN_IF_ERROR(ConfigureImageToTensorCalculator(
|
|
image_tensor_specs, options->mutable_image_to_tensor_options()));
|
|
// The GPU backend isn't able to process int data. If the input tensor is
|
|
// quantized, forces the image preprocessing graph to use CPU backend.
|
|
if (use_gpu && image_tensor_specs.tensor_type != tflite::TensorType_UINT8) {
|
|
options->set_backend(ImagePreprocessingOptions::GPU_BACKEND);
|
|
} else {
|
|
options->set_backend(ImagePreprocessingOptions::CPU_BACKEND);
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
Source<Image> AddDataConverter(Source<Image> image_in, Graph& graph,
|
|
bool output_on_gpu) {
|
|
auto& image_converter = graph.AddNode("ImageCloneCalculator");
|
|
image_converter.GetOptions<mediapipe::ImageCloneCalculatorOptions>()
|
|
.set_output_on_gpu(output_on_gpu);
|
|
image_in >> image_converter.In("");
|
|
return image_converter[Output<Image>("")];
|
|
}
|
|
|
|
// A "mediapipe.tasks.components.ImagePreprocessingSubgraph" performs image
|
|
// preprocessing.
|
|
// - Accepts CPU input images and outputs CPU tensors.
|
|
//
|
|
// Inputs:
|
|
// IMAGE - Image
|
|
// The image to preprocess.
|
|
// NORM_RECT - NormalizedRect @Optional
|
|
// Describes region of image to extract.
|
|
// @Optional: rect covering the whole image is used if not specified.
|
|
// Outputs:
|
|
// TENSORS - std::vector<Tensor>
|
|
// Vector containing a single Tensor populated with the converted and
|
|
// preprocessed image.
|
|
// MATRIX - std::array<float,16> @Optional
|
|
// An std::array<float, 16> representing a 4x4 row-major-order matrix that
|
|
// maps a point on the input image to a point on the output tensor, and
|
|
// can be used to reverse the mapping by inverting the matrix.
|
|
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
|
// An std::array<float, 4> representing the letterbox padding from the 4
|
|
// sides ([left, top, right, bottom]) of the output image, normalized to
|
|
// [0.f, 1.f] by the output dimensions. The padding values are non-zero only
|
|
// when the "keep_aspect_ratio" is true in ImagePreprocessingOptions.
|
|
// IMAGE_SIZE - std::pair<int,int> @Optional
|
|
// The size of the original input image as a <width, height> pair.
|
|
// IMAGE - Image @Optional
|
|
// The image that has the pixel data stored on the target storage (CPU vs
|
|
// GPU).
|
|
//
|
|
// The recommended way of using this subgraph is through the GraphBuilder API
|
|
// using the 'ConfigureImagePreprocessing()' function. See header file for more
|
|
// details.
|
|
class ImagePreprocessingSubgraph : public Subgraph {
|
|
public:
|
|
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
|
SubgraphContext* sc) override {
|
|
Graph graph;
|
|
auto output_streams = BuildImagePreprocessing(
|
|
sc->Options<ImagePreprocessingOptions>(),
|
|
graph[Input<Image>(kImageTag)],
|
|
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph);
|
|
output_streams.tensors >> graph[Output<std::vector<Tensor>>(kTensorsTag)];
|
|
output_streams.matrix >> graph[Output<std::array<float, 16>>(kMatrixTag)];
|
|
output_streams.letterbox_padding >>
|
|
graph[Output<std::array<float, 4>>(kLetterboxPaddingTag)];
|
|
output_streams.image_size >>
|
|
graph[Output<std::pair<int, int>>(kImageSizeTag)];
|
|
output_streams.image >> graph[Output<Image>(kImageTag)];
|
|
return graph.GetConfig();
|
|
}
|
|
|
|
private:
|
|
// Adds a mediapipe image preprocessing subgraph into the provided
|
|
// builder::Graph instance. The image preprocessing subgraph takes images
|
|
// (mediapipe::Image) and region of interest (mediapipe::NormalizedRect) as
|
|
// inputs and returns 5 output streams:
|
|
// - the converted tensor (mediapipe::Tensor),
|
|
// - the transformation matrix (std::array<float, 16>),
|
|
// - the letterbox padding (std::array<float, 4>>),
|
|
// - the original image size (std::pair<int, int>),
|
|
// - the image that has pixel data stored on the target storage
|
|
// (mediapipe::Image).
|
|
//
|
|
// options: the mediapipe tasks ImagePreprocessingOptions.
|
|
// image_in: (mediapipe::Image) stream to preprocess.
|
|
// graph: the mediapipe builder::Graph instance to be updated.
|
|
ImagePreprocessingOutputStreams BuildImagePreprocessing(
|
|
const ImagePreprocessingOptions& options, Source<Image> image_in,
|
|
Source<NormalizedRect> norm_rect_in, Graph& graph) {
|
|
// Convert image to tensor.
|
|
auto& image_to_tensor = graph.AddNode("ImageToTensorCalculator");
|
|
image_to_tensor.GetOptions<mediapipe::ImageToTensorCalculatorOptions>()
|
|
.CopyFrom(options.image_to_tensor_options());
|
|
switch (options.backend()) {
|
|
case ImagePreprocessingOptions::CPU_BACKEND: {
|
|
auto cpu_image =
|
|
AddDataConverter(image_in, graph, /*output_on_gpu=*/false);
|
|
cpu_image >> image_to_tensor.In(kImageTag);
|
|
break;
|
|
}
|
|
case ImagePreprocessingOptions::GPU_BACKEND: {
|
|
auto gpu_image =
|
|
AddDataConverter(image_in, graph, /*output_on_gpu=*/true);
|
|
gpu_image >> image_to_tensor.In(kImageTag);
|
|
break;
|
|
}
|
|
default:
|
|
image_in >> image_to_tensor.In(kImageTag);
|
|
}
|
|
norm_rect_in >> image_to_tensor.In(kNormRectTag);
|
|
|
|
// Extract optional image properties.
|
|
auto& image_size = graph.AddNode("ImagePropertiesCalculator");
|
|
image_in >> image_size.In(kImageTag);
|
|
|
|
// TODO: Replace PassThroughCalculator with a calculator that
|
|
// converts the pixel data to be stored on the target storage (CPU vs GPU).
|
|
auto& pass_through = graph.AddNode("PassThroughCalculator");
|
|
image_in >> pass_through.In("");
|
|
|
|
// Connect outputs.
|
|
return {
|
|
/* tensors= */ image_to_tensor[Output<std::vector<Tensor>>(
|
|
kTensorsTag)],
|
|
/* matrix= */
|
|
image_to_tensor[Output<std::array<float, 16>>(kMatrixTag)],
|
|
/* letterbox_padding= */
|
|
image_to_tensor[Output<std::array<float, 4>>(kLetterboxPaddingTag)],
|
|
/* image_size= */ image_size[Output<std::pair<int, int>>(kSizeTag)],
|
|
/* image= */ pass_through[Output<Image>("")],
|
|
};
|
|
}
|
|
};
|
|
REGISTER_MEDIAPIPE_GRAPH(
|
|
::mediapipe::tasks::components::ImagePreprocessingSubgraph);
|
|
|
|
} // namespace components
|
|
} // namespace tasks
|
|
} // namespace mediapipe
|