544 lines
24 KiB
C++
544 lines
24 KiB
C++
/* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
#include "absl/status/status.h"
|
|
#include "absl/status/statusor.h"
|
|
#include "absl/strings/str_format.h"
|
|
#include "mediapipe/calculators/image/image_clone_calculator.pb.h"
|
|
#include "mediapipe/calculators/image/image_transformation_calculator.pb.h"
|
|
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
|
|
#include "mediapipe/framework/api2/builder.h"
|
|
#include "mediapipe/framework/api2/port.h"
|
|
#include "mediapipe/framework/formats/image.h"
|
|
#include "mediapipe/framework/formats/rect.pb.h"
|
|
#include "mediapipe/framework/formats/tensor.h"
|
|
#include "mediapipe/framework/port/status_macros.h"
|
|
#include "mediapipe/tasks/cc/common.h"
|
|
#include "mediapipe/tasks/cc/components/processors/image_preprocessing_graph.h"
|
|
#include "mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options.pb.h"
|
|
#include "mediapipe/tasks/cc/core/model_resources.h"
|
|
#include "mediapipe/tasks/cc/core/model_task_graph.h"
|
|
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
|
#include "mediapipe/tasks/cc/metadata/metadata_extractor.h"
|
|
#include "mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator.pb.h"
|
|
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options.pb.h"
|
|
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options.pb.h"
|
|
#include "mediapipe/tasks/cc/vision/utils/image_tensor_specs.h"
|
|
#include "mediapipe/tasks/metadata/image_segmenter_metadata_schema_generated.h"
|
|
#include "mediapipe/tasks/metadata/metadata_schema_generated.h"
|
|
#include "mediapipe/util/graph_builder_utils.h"
|
|
#include "mediapipe/util/label_map.pb.h"
|
|
#include "mediapipe/util/label_map_util.h"
|
|
#include "tensorflow/lite/schema/schema_generated.h"
|
|
|
|
namespace mediapipe {
|
|
namespace tasks {
|
|
namespace vision {
|
|
namespace image_segmenter {
|
|
|
|
namespace {
|
|
|
|
using ::mediapipe::Image;
|
|
using ::mediapipe::NormalizedRect;
|
|
using ::mediapipe::api2::Input;
|
|
using ::mediapipe::api2::Output;
|
|
using ::mediapipe::api2::builder::Graph;
|
|
using ::mediapipe::api2::builder::MultiSource;
|
|
using ::mediapipe::api2::builder::Source;
|
|
using ::mediapipe::tasks::metadata::ModelMetadataExtractor;
|
|
using ::mediapipe::tasks::vision::image_segmenter::proto::
|
|
ImageSegmenterGraphOptions;
|
|
using ::mediapipe::tasks::vision::image_segmenter::proto::SegmenterOptions;
|
|
using ::tflite::TensorMetadata;
|
|
using LabelItems = mediapipe::proto_ns::Map<int64_t, ::mediapipe::LabelMapItem>;
|
|
|
|
constexpr char kSegmentationTag[] = "SEGMENTATION";
|
|
constexpr char kGroupedSegmentationTag[] = "GROUPED_SEGMENTATION";
|
|
constexpr char kConfidenceMaskTag[] = "CONFIDENCE_MASK";
|
|
constexpr char kConfidenceMasksTag[] = "CONFIDENCE_MASKS";
|
|
constexpr char kCategoryMaskTag[] = "CATEGORY_MASK";
|
|
constexpr char kImageTag[] = "IMAGE";
|
|
constexpr char kImageCpuTag[] = "IMAGE_CPU";
|
|
constexpr char kImageGpuTag[] = "IMAGE_GPU";
|
|
constexpr char kNormRectTag[] = "NORM_RECT";
|
|
constexpr char kTensorsTag[] = "TENSORS";
|
|
constexpr char kOutputSizeTag[] = "OUTPUT_SIZE";
|
|
constexpr char kSegmentationMetadataName[] = "SEGMENTER_METADATA";
|
|
|
|
// Struct holding the different output streams produced by the image segmenter
|
|
// subgraph.
|
|
struct ImageSegmenterOutputs {
|
|
std::optional<std::vector<Source<Image>>> segmented_masks;
|
|
std::optional<std::vector<Source<Image>>> confidence_masks;
|
|
std::optional<Source<Image>> category_mask;
|
|
// The same as the input image, mainly used for live stream mode.
|
|
Source<Image> image;
|
|
};
|
|
|
|
// Struct holding the image and input tensors after image preprocessing and
|
|
// transferred to the requested device.
|
|
struct ImageAndTensorsOnDevice {
|
|
Source<Image> image;
|
|
Source<std::vector<Tensor>> tensors;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
absl::Status SanityCheckOptions(const ImageSegmenterGraphOptions& options) {
|
|
// TODO: remove deprecated output type support.
|
|
if (options.segmenter_options().has_output_type() &&
|
|
options.segmenter_options().output_type() ==
|
|
SegmenterOptions::UNSPECIFIED) {
|
|
return CreateStatusWithPayload(absl::StatusCode::kInvalidArgument,
|
|
"`output_type` must not be UNSPECIFIED",
|
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::StatusOr<LabelItems> GetLabelItemsIfAny(
|
|
const ModelMetadataExtractor& metadata_extractor,
|
|
const TensorMetadata& tensor_metadata, absl::string_view locale) {
|
|
const std::string labels_filename =
|
|
ModelMetadataExtractor::FindFirstAssociatedFileName(
|
|
tensor_metadata, tflite::AssociatedFileType_TENSOR_AXIS_LABELS);
|
|
if (labels_filename.empty()) {
|
|
LabelItems empty_label_items;
|
|
return empty_label_items;
|
|
}
|
|
ASSIGN_OR_RETURN(absl::string_view labels_file,
|
|
metadata_extractor.GetAssociatedFile(labels_filename));
|
|
const std::string display_names_filename =
|
|
ModelMetadataExtractor::FindFirstAssociatedFileName(
|
|
tensor_metadata, tflite::AssociatedFileType_TENSOR_AXIS_LABELS,
|
|
locale);
|
|
absl::string_view display_names_file;
|
|
if (!display_names_filename.empty()) {
|
|
ASSIGN_OR_RETURN(display_names_file, metadata_extractor.GetAssociatedFile(
|
|
display_names_filename));
|
|
}
|
|
return mediapipe::BuildLabelMapFromFiles(labels_file, display_names_file);
|
|
}
|
|
|
|
absl::Status ConfigureTensorsToSegmentationCalculator(
|
|
const ImageSegmenterGraphOptions& segmenter_option,
|
|
const core::ModelResources& model_resources,
|
|
TensorsToSegmentationCalculatorOptions* options) {
|
|
// Set default activation function NONE
|
|
options->mutable_segmenter_options()->CopyFrom(
|
|
segmenter_option.segmenter_options());
|
|
// Find the custom metadata of ImageSegmenterOptions type in model metadata.
|
|
const auto* metadata_extractor = model_resources.GetMetadataExtractor();
|
|
bool found_activation_in_metadata = false;
|
|
if (metadata_extractor->GetCustomMetadataList() != nullptr &&
|
|
metadata_extractor->GetCustomMetadataList()->size() > 0) {
|
|
for (const auto& custom_metadata :
|
|
*metadata_extractor->GetCustomMetadataList()) {
|
|
if (custom_metadata->name()->str() == kSegmentationMetadataName) {
|
|
found_activation_in_metadata = true;
|
|
auto activation_fb =
|
|
GetImageSegmenterOptions(custom_metadata->data()->data())
|
|
->activation();
|
|
switch (activation_fb) {
|
|
case Activation_NONE:
|
|
options->mutable_segmenter_options()->set_activation(
|
|
SegmenterOptions::NONE);
|
|
break;
|
|
case Activation_SIGMOID:
|
|
options->mutable_segmenter_options()->set_activation(
|
|
SegmenterOptions::SIGMOID);
|
|
break;
|
|
case Activation_SOFTMAX:
|
|
options->mutable_segmenter_options()->set_activation(
|
|
SegmenterOptions::SOFTMAX);
|
|
break;
|
|
default:
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInvalidArgument,
|
|
"Invalid activation type found in CustomMetadata of "
|
|
"ImageSegmenterOptions type.");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!found_activation_in_metadata) {
|
|
LOG(WARNING)
|
|
<< "No activation type is found in model metadata. Use NONE for "
|
|
"ImageSegmenterGraph.";
|
|
}
|
|
const tflite::Model& model = *model_resources.GetTfLiteModel();
|
|
if (model.subgraphs()->size() != 1) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInvalidArgument,
|
|
"Segmentation tflite models are assumed to have a single subgraph.",
|
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
|
}
|
|
const auto* primary_subgraph = (*model.subgraphs())[0];
|
|
if (primary_subgraph->outputs()->size() != 1) {
|
|
return CreateStatusWithPayload(
|
|
absl::StatusCode::kInvalidArgument,
|
|
"Segmentation tflite models are assumed to have a single output.",
|
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
|
}
|
|
|
|
ASSIGN_OR_RETURN(
|
|
*options->mutable_label_items(),
|
|
GetLabelItemsIfAny(*metadata_extractor,
|
|
*metadata_extractor->GetOutputTensorMetadata()->Get(0),
|
|
segmenter_option.display_names_locale()));
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Get the output tensor from the tflite model of given model resources.
|
|
absl::StatusOr<const tflite::Tensor*> GetOutputTensor(
|
|
const core::ModelResources& model_resources) {
|
|
const tflite::Model& model = *model_resources.GetTfLiteModel();
|
|
const auto* primary_subgraph = (*model.subgraphs())[0];
|
|
const auto* output_tensor =
|
|
(*primary_subgraph->tensors())[(*primary_subgraph->outputs())[0]];
|
|
return output_tensor;
|
|
}
|
|
|
|
// Get the input tensor from the tflite model of given model resources.
|
|
absl::StatusOr<const tflite::Tensor*> GetInputTensor(
|
|
const core::ModelResources& model_resources) {
|
|
const tflite::Model& model = *model_resources.GetTfLiteModel();
|
|
const auto* primary_subgraph = (*model.subgraphs())[0];
|
|
const auto* input_tensor =
|
|
(*primary_subgraph->tensors())[(*primary_subgraph->inputs())[0]];
|
|
return input_tensor;
|
|
}
|
|
|
|
// Configure the ImageTransformationCalculator according to the input tensor.
|
|
void ConfigureImageTransformationCalculator(
|
|
const tflite::Tensor& tflite_input_tensor,
|
|
mediapipe::ImageTransformationCalculatorOptions& options) {
|
|
options.set_output_height(tflite_input_tensor.shape()->data()[1]);
|
|
options.set_output_width(tflite_input_tensor.shape()->data()[2]);
|
|
}
|
|
|
|
// Configure the TensorConverterCalculator to convert the image to tensor.
|
|
void ConfigureTensorConverterCalculator(
|
|
const ImageTensorSpecs& image_tensor_specs,
|
|
mediapipe::TensorConverterCalculatorOptions& options) {
|
|
float mean = image_tensor_specs.normalization_options->mean_values[0];
|
|
float std = image_tensor_specs.normalization_options->std_values[0];
|
|
options.set_max_num_channels(4);
|
|
options.mutable_output_tensor_float_range()->set_min((0.0f - mean) / std);
|
|
options.mutable_output_tensor_float_range()->set_max((255.0f - mean) / std);
|
|
}
|
|
|
|
// Image preprocessing step to convert the given image to the input tensors for
|
|
// the tflite model.
|
|
absl::StatusOr<ImageAndTensorsOnDevice> ConvertImageToTensors(
|
|
Source<Image> image_in, Source<NormalizedRect> norm_rect_in, bool use_gpu,
|
|
const core::ModelResources& model_resources, Graph& graph) {
|
|
ASSIGN_OR_RETURN(const tflite::Tensor* tflite_input_tensor,
|
|
GetInputTensor(model_resources));
|
|
if (tflite_input_tensor->shape()->size() != 4) {
|
|
return absl::InvalidArgumentError(
|
|
absl::StrFormat("Expect segmentation model has input image tensor to "
|
|
"be 4 dims. Got input tensor with "
|
|
"dims: %d",
|
|
tflite_input_tensor->shape()->size()));
|
|
}
|
|
const int input_tensor_channel = tflite_input_tensor->shape()->data()[3];
|
|
if (input_tensor_channel != 3 && input_tensor_channel != 4) {
|
|
return absl::InvalidArgumentError(absl::StrFormat(
|
|
"Expect segmentation model has input image tensor with channels = 3 or "
|
|
"4. Get "
|
|
"channel = %d",
|
|
tflite_input_tensor->shape()->data()[3]));
|
|
} else if (input_tensor_channel == 3) {
|
|
// ImagePreprocessingGraph is backed by ImageToTensorCalculator which only
|
|
// supports Tensor with channel = 3.
|
|
auto& preprocessing = graph.AddNode(
|
|
"mediapipe.tasks.components.processors.ImagePreprocessingGraph");
|
|
MP_RETURN_IF_ERROR(components::processors::ConfigureImagePreprocessingGraph(
|
|
model_resources, use_gpu,
|
|
&preprocessing.GetOptions<tasks::components::processors::proto::
|
|
ImagePreprocessingGraphOptions>()));
|
|
image_in >> preprocessing.In(kImageTag);
|
|
norm_rect_in >> preprocessing.In(kNormRectTag);
|
|
return {{preprocessing.Out(kImageTag).Cast<Image>(),
|
|
preprocessing.Out(kTensorsTag).Cast<std::vector<Tensor>>()}};
|
|
} else {
|
|
// TODO Remove legacy preprocessing calculators.
|
|
// For segmentation model with input Tensor with channel = 4, use legacy
|
|
// TfLite preprocessing calculators
|
|
|
|
// Upload image to GPU if requested to use gpu.
|
|
auto& image_clone = graph.AddNode("ImageCloneCalculator");
|
|
image_clone.GetOptions<mediapipe::ImageCloneCalculatorOptions>()
|
|
.set_output_on_gpu(use_gpu);
|
|
image_in >> image_clone.In("");
|
|
Source<Image> image_on_device = image_clone.Out("").Cast<Image>();
|
|
|
|
// Convert from Image to legacy ImageFrame or GpuBuffer.
|
|
auto& from_image = graph.AddNode("FromImageCalculator");
|
|
image_on_device >> from_image.In(kImageTag);
|
|
auto image_cpu_or_gpu =
|
|
from_image.Out(use_gpu ? kImageGpuTag : kImageCpuTag);
|
|
|
|
// Resize the input image to the model input size.
|
|
auto& image_transformation = graph.AddNode("ImageTransformationCalculator");
|
|
ConfigureImageTransformationCalculator(
|
|
*tflite_input_tensor,
|
|
image_transformation
|
|
.GetOptions<mediapipe::ImageTransformationCalculatorOptions>());
|
|
const absl::string_view image_or_image_gpu_tag =
|
|
use_gpu ? kImageGpuTag : kImageTag;
|
|
image_cpu_or_gpu >> image_transformation.In(image_or_image_gpu_tag);
|
|
auto transformed_image = image_transformation.Out(image_or_image_gpu_tag);
|
|
|
|
// Convert image to mediapipe tensor.
|
|
auto& tensor_converter = graph.AddNode("TensorConverterCalculator");
|
|
ASSIGN_OR_RETURN(auto image_tensor_specs,
|
|
vision::BuildInputImageTensorSpecs(model_resources));
|
|
ConfigureTensorConverterCalculator(
|
|
image_tensor_specs,
|
|
tensor_converter
|
|
.GetOptions<mediapipe::TensorConverterCalculatorOptions>());
|
|
|
|
transformed_image >> tensor_converter.In(image_or_image_gpu_tag);
|
|
auto tensors =
|
|
tensor_converter.Out(kTensorsTag).Cast<std::vector<Tensor>>();
|
|
|
|
return {{image_on_device, tensors}};
|
|
}
|
|
}
|
|
|
|
// An "mediapipe.tasks.vision.image_segmenter.ImageSegmenterGraph" performs
|
|
// semantic segmentation. The graph can output optional confidence masks if
|
|
// CONFIDENCE_MASKS is connected, and an optional category mask if CATEGORY_MASK
|
|
// is connected. At least one of CONFIDENCE_MASK, CONFIDENCE_MASKS and
|
|
// CATEGORY_MASK must be connected.
|
|
//
|
|
// Two kinds of outputs for confidence mask are provided: CONFIDENCE_MASK and
|
|
// CONFIDENCE_MASKS. Users can retrieve segmented mask of only particular
|
|
// category/channel from CONFIDENCE_MASK, and users can also get all segmented
|
|
// confidence masks from CONFIDENCE_MASKS.
|
|
// - Accepts CPU input images and outputs segmented masks on CPU.
|
|
//
|
|
// Inputs:
|
|
// IMAGE - Image
|
|
// Image to perform segmentation on.
|
|
// NORM_RECT - NormalizedRect @Optional
|
|
// Describes image rotation and region of image to perform detection
|
|
// on.
|
|
// @Optional: rect covering the whole image is used if not specified.
|
|
//
|
|
// Outputs:
|
|
// CONFIDENCE_MASK - mediapipe::Image @Multiple
|
|
// Confidence masks for individual category. Confidence mask of single
|
|
// category can be accessed by index based output stream.
|
|
// CONFIDENCE_MASKS - std::vector<mediapipe::Image> @Optional
|
|
// The output confidence masks grouped in a vector.
|
|
// CATEGORY_MASK - mediapipe::Image @Optional
|
|
// Optional Category mask.
|
|
// IMAGE - mediapipe::Image
|
|
// The image that image segmenter runs on.
|
|
//
|
|
// Example:
|
|
// node {
|
|
// calculator: "mediapipe.tasks.vision.image_segmenter.ImageSegmenterGraph"
|
|
// input_stream: "IMAGE:image"
|
|
// output_stream: "SEGMENTATION:segmented_masks"
|
|
// options {
|
|
// [mediapipe.tasks.vision.image_segmenter.proto.ImageSegmenterGraphOptions.ext]
|
|
// {
|
|
// base_options {
|
|
// model_asset {
|
|
// file_name: "/path/to/model.tflite"
|
|
// }
|
|
// }
|
|
// segmenter_options {
|
|
// output_type: CONFIDENCE_MASK
|
|
// activation: SOFTMAX
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
class ImageSegmenterGraph : public core::ModelTaskGraph {
|
|
public:
|
|
absl::StatusOr<mediapipe::CalculatorGraphConfig> GetConfig(
|
|
mediapipe::SubgraphContext* sc) override {
|
|
ASSIGN_OR_RETURN(const auto* model_resources,
|
|
CreateModelResources<ImageSegmenterGraphOptions>(sc));
|
|
Graph graph;
|
|
const auto& options = sc->Options<ImageSegmenterGraphOptions>();
|
|
// TODO: remove deprecated output type support.
|
|
if (!options.segmenter_options().has_output_type()) {
|
|
MP_RETURN_IF_ERROR(SanityCheck(sc));
|
|
}
|
|
ASSIGN_OR_RETURN(
|
|
auto output_streams,
|
|
BuildSegmentationTask(
|
|
options, *model_resources, graph[Input<Image>(kImageTag)],
|
|
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
|
|
|
|
// TODO: remove deprecated output type support.
|
|
if (options.segmenter_options().has_output_type()) {
|
|
auto& merge_images_to_vector =
|
|
graph.AddNode("MergeImagesToVectorCalculator");
|
|
for (int i = 0; i < output_streams.segmented_masks->size(); ++i) {
|
|
output_streams.segmented_masks->at(i) >>
|
|
merge_images_to_vector[Input<Image>::Multiple("")][i];
|
|
output_streams.segmented_masks->at(i) >>
|
|
graph[Output<Image>::Multiple(kSegmentationTag)][i];
|
|
}
|
|
merge_images_to_vector.Out("") >>
|
|
graph[Output<std::vector<Image>>(kGroupedSegmentationTag)];
|
|
} else {
|
|
if (output_streams.confidence_masks) {
|
|
auto& merge_images_to_vector =
|
|
graph.AddNode("MergeImagesToVectorCalculator");
|
|
for (int i = 0; i < output_streams.confidence_masks->size(); ++i) {
|
|
output_streams.confidence_masks->at(i) >>
|
|
merge_images_to_vector[Input<Image>::Multiple("")][i];
|
|
output_streams.confidence_masks->at(i) >>
|
|
graph[Output<Image>::Multiple(kConfidenceMaskTag)][i];
|
|
}
|
|
merge_images_to_vector.Out("") >>
|
|
graph[Output<std::vector<Image>>::Optional(kConfidenceMasksTag)];
|
|
}
|
|
if (output_streams.category_mask) {
|
|
*output_streams.category_mask >> graph[Output<Image>(kCategoryMaskTag)];
|
|
}
|
|
}
|
|
output_streams.image >> graph[Output<Image>(kImageTag)];
|
|
return graph.GetConfig();
|
|
}
|
|
|
|
private:
|
|
absl::Status SanityCheck(mediapipe::SubgraphContext* sc) {
|
|
const auto& node = sc->OriginalNode();
|
|
output_confidence_masks_ = HasOutput(node, kConfidenceMaskTag) ||
|
|
HasOutput(node, kConfidenceMasksTag);
|
|
output_category_mask_ = HasOutput(node, kCategoryMaskTag);
|
|
if (!output_confidence_masks_ && !output_category_mask_) {
|
|
return absl::InvalidArgumentError(
|
|
"At least one of CONFIDENCE_MASK, CONFIDENCE_MASKS and CATEGORY_MASK "
|
|
"must be connected.");
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
// Adds a mediapipe image segmentation task pipeline graph into the provided
|
|
// builder::Graph instance. The segmentation pipeline takes images
|
|
// (mediapipe::Image) as the input and returns segmented image mask as output.
|
|
//
|
|
// task_options: the mediapipe tasks ImageSegmenterGraphOptions proto.
|
|
// model_resources: the ModelSources object initialized from a segmentation
|
|
// model file with model metadata.
|
|
// image_in: (mediapipe::Image) stream to run segmentation on.
|
|
// graph: the mediapipe builder::Graph instance to be updated.
|
|
absl::StatusOr<ImageSegmenterOutputs> BuildSegmentationTask(
|
|
const ImageSegmenterGraphOptions& task_options,
|
|
const core::ModelResources& model_resources, Source<Image> image_in,
|
|
Source<NormalizedRect> norm_rect_in, Graph& graph) {
|
|
MP_RETURN_IF_ERROR(SanityCheckOptions(task_options));
|
|
|
|
// Adds preprocessing calculators and connects them to the graph input image
|
|
// stream.
|
|
bool use_gpu =
|
|
components::processors::DetermineImagePreprocessingGpuBackend(
|
|
task_options.base_options().acceleration());
|
|
ASSIGN_OR_RETURN(auto image_and_tensors,
|
|
ConvertImageToTensors(image_in, norm_rect_in, use_gpu,
|
|
model_resources, graph));
|
|
// Adds inference subgraph and connects its input stream to the output
|
|
// tensors produced by the ImageToTensorCalculator.
|
|
auto& inference = AddInference(
|
|
model_resources, task_options.base_options().acceleration(), graph);
|
|
image_and_tensors.tensors >> inference.In(kTensorsTag);
|
|
|
|
// Adds segmentation calculators for output streams.
|
|
auto& tensor_to_images =
|
|
graph.AddNode("mediapipe.tasks.TensorsToSegmentationCalculator");
|
|
RET_CHECK_OK(ConfigureTensorsToSegmentationCalculator(
|
|
task_options, model_resources,
|
|
&tensor_to_images
|
|
.GetOptions<TensorsToSegmentationCalculatorOptions>()));
|
|
inference.Out(kTensorsTag) >> tensor_to_images.In(kTensorsTag);
|
|
|
|
// Adds image property calculator for output size.
|
|
auto& image_properties = graph.AddNode("ImagePropertiesCalculator");
|
|
image_in >> image_properties.In("IMAGE");
|
|
image_properties.Out("SIZE") >> tensor_to_images.In(kOutputSizeTag);
|
|
|
|
// Exports multiple segmented masks.
|
|
// TODO: remove deprecated output type support.
|
|
if (task_options.segmenter_options().has_output_type()) {
|
|
std::vector<Source<Image>> segmented_masks;
|
|
if (task_options.segmenter_options().output_type() ==
|
|
SegmenterOptions::CATEGORY_MASK) {
|
|
segmented_masks.push_back(
|
|
Source<Image>(tensor_to_images[Output<Image>(kSegmentationTag)]));
|
|
} else {
|
|
ASSIGN_OR_RETURN(const tflite::Tensor* output_tensor,
|
|
GetOutputTensor(model_resources));
|
|
int segmentation_streams_num = *output_tensor->shape()->rbegin();
|
|
for (int i = 0; i < segmentation_streams_num; ++i) {
|
|
segmented_masks.push_back(Source<Image>(
|
|
tensor_to_images[Output<Image>::Multiple(kSegmentationTag)][i]));
|
|
}
|
|
}
|
|
return ImageSegmenterOutputs{/*segmented_masks=*/segmented_masks,
|
|
/*confidence_masks=*/std::nullopt,
|
|
/*category_mask=*/std::nullopt,
|
|
/*image=*/image_and_tensors.image};
|
|
} else {
|
|
std::optional<std::vector<Source<Image>>> confidence_masks;
|
|
if (output_confidence_masks_) {
|
|
ASSIGN_OR_RETURN(const tflite::Tensor* output_tensor,
|
|
GetOutputTensor(model_resources));
|
|
int segmentation_streams_num = *output_tensor->shape()->rbegin();
|
|
confidence_masks = std::vector<Source<Image>>();
|
|
confidence_masks->reserve(segmentation_streams_num);
|
|
for (int i = 0; i < segmentation_streams_num; ++i) {
|
|
confidence_masks->push_back(Source<Image>(
|
|
tensor_to_images[Output<Image>::Multiple(kConfidenceMaskTag)]
|
|
[i]));
|
|
}
|
|
}
|
|
std::optional<Source<Image>> category_mask;
|
|
if (output_category_mask_) {
|
|
category_mask = tensor_to_images[Output<Image>(kCategoryMaskTag)];
|
|
}
|
|
return ImageSegmenterOutputs{/*segmented_masks=*/std::nullopt,
|
|
/*confidence_masks=*/confidence_masks,
|
|
/*category_mask=*/category_mask,
|
|
/*image=*/image_and_tensors.image};
|
|
}
|
|
}
|
|
|
|
bool output_confidence_masks_ = false;
|
|
bool output_category_mask_ = false;
|
|
};
|
|
|
|
REGISTER_MEDIAPIPE_GRAPH(
|
|
::mediapipe::tasks::vision::image_segmenter::ImageSegmenterGraph);
|
|
|
|
} // namespace image_segmenter
|
|
} // namespace vision
|
|
} // namespace tasks
|
|
} // namespace mediapipe
|