Internal change

PiperOrigin-RevId: 516882513
This commit is contained in:
MediaPipe Team 2023-03-15 11:31:28 -07:00 committed by Copybara-Service
parent 18d88c531a
commit a323825134
5 changed files with 250 additions and 84 deletions

View File

@ -15,6 +15,7 @@ limitations under the License.
#include <algorithm>
#include <cstdint>
#include <limits>
#include <memory>
#include <ostream>
#include <string>
@ -79,6 +80,133 @@ void Sigmoid(absl::Span<const float> values,
[](float value) { return 1. / (1 + std::exp(-value)); });
}
std::vector<Image> ProcessForCategoryMaskCpu(const Shape& input_shape,
const Shape& output_shape,
const SegmenterOptions& options,
const float* tensors_buffer) {
cv::Mat resized_tensors_mat;
cv::Mat tensors_mat_view(
input_shape.height, input_shape.width, CV_32FC(input_shape.channels),
reinterpret_cast<void*>(const_cast<float*>(tensors_buffer)));
if (output_shape.height == input_shape.height &&
output_shape.width == input_shape.width) {
resized_tensors_mat = tensors_mat_view;
} else {
// Resize input tensors to output size.
// TOOD(b/273633027) Use an efficient way to find values for category mask
// instead of resizing the whole tensor .
cv::resize(tensors_mat_view, resized_tensors_mat,
{output_shape.width, output_shape.height}, 0, 0,
cv::INTER_LINEAR);
}
// Category mask Image.
ImageFrameSharedPtr image_frame_ptr = std::make_shared<ImageFrame>(
ImageFormat::GRAY8, output_shape.width, output_shape.height, 1);
Image category_mask(image_frame_ptr);
// Fill in the maximum category in the category mask image.
cv::Mat category_mask_mat_view =
mediapipe::formats::MatView(image_frame_ptr.get());
const int input_channels = input_shape.channels;
category_mask_mat_view.forEach<uint8_t>(
[&resized_tensors_mat, &input_channels, &options](uint8_t& pixel,
const int position[]) {
float* tensors_buffer =
resized_tensors_mat.ptr<float>(position[0], position[1]);
absl::Span<float> confidence_scores(tensors_buffer, input_channels);
// Only process the activation function if it is SIGMOID. If NONE,
// we do nothing for activation, If SOFTMAX, it is required
// to have input_channels > 1, and for input_channels > 1, we don't need
// activation to find the maximum value.
if (options.activation() == SegmenterOptions::SIGMOID) {
Sigmoid(confidence_scores, confidence_scores);
}
if (input_channels == 1) {
// if the input tensor is a single mask, it is assumed to be a binary
// foreground segmentation mask. For such a mask, we make foreground
// category 1, and background category 0.
pixel = static_cast<uint8_t>(*tensors_buffer > 0.5f);
} else {
const int maximum_category_idx =
std::max_element(confidence_scores.begin(),
confidence_scores.end()) -
confidence_scores.begin();
pixel = maximum_category_idx;
}
});
return {category_mask};
}
std::vector<Image> ProcessForConfidenceMaskCpu(const Shape& input_shape,
const Shape& output_shape,
const SegmenterOptions& options,
const float* tensors_buffer) {
std::function<void(absl::Span<const float> values,
absl::Span<float> activated_values)>
activation_fn;
switch (options.activation()) {
case SegmenterOptions::SIGMOID:
activation_fn = &Sigmoid;
break;
case SegmenterOptions::SOFTMAX:
activation_fn = &StableSoftmax;
break;
case SegmenterOptions::NONE:
// Just copying for NONE activation.
activation_fn = [](absl::Span<const float> values,
absl::Span<float> activated_values) {
std::copy(values.begin(), values.end(), activated_values.begin());
};
break;
}
// TODO Use libyuv for resizing instead.
std::vector<Image> confidence_masks;
std::vector<cv::Mat> confidence_mask_mats;
confidence_masks.reserve(input_shape.channels);
confidence_mask_mats.reserve(input_shape.channels);
for (int i = 0; i < input_shape.channels; ++i) {
confidence_masks.push_back(Image(std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, input_shape.width, input_shape.height, 1)));
confidence_mask_mats.push_back(mediapipe::formats::MatView(
confidence_masks.back().GetImageFrameSharedPtr().get()));
}
// Applies activation function.
const int tensor_size = input_shape.height * input_shape.width;
std::vector<float> activated_values(input_shape.channels);
absl::Span<float> activated_values_span(activated_values);
for (int i = 0; i < tensor_size; ++i) {
activation_fn(absl::MakeConstSpan(&tensors_buffer[i * input_shape.channels],
input_shape.channels),
activated_values_span);
for (int j = 0; j < input_shape.channels; ++j) {
confidence_mask_mats[j].at<float>(
i / input_shape.width, i % input_shape.width) = activated_values[j];
}
}
if (output_shape.height == input_shape.height &&
output_shape.width == input_shape.width) {
return confidence_masks;
}
std::vector<Image> resized_confidence_masks;
resized_confidence_masks.reserve(confidence_mask_mats.size());
// Resizes segmented masks to required output size.
for (int i = 0; i < confidence_mask_mats.size(); i++) {
// Pre-allocates ImageFrame memory to avoid copying from cv::Mat
// afterward.
ImageFrameSharedPtr image_frame_ptr = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_shape.width, output_shape.height, 1);
cv::Mat resized_mask_mat_view =
mediapipe::formats::MatView(image_frame_ptr.get());
cv::resize(confidence_mask_mats[i], resized_mask_mat_view,
resized_mask_mat_view.size(), 0, 0, cv::INTER_LINEAR);
resized_confidence_masks.push_back(Image(image_frame_ptr));
}
return resized_confidence_masks;
}
} // namespace
// Converts Tensors from a vector of Tensor to Segmentation.
@ -222,81 +350,16 @@ absl::Status TensorsToSegmentationCalculator::Process(
std::vector<Image> TensorsToSegmentationCalculator::GetSegmentationResultCpu(
const Shape& input_shape, const Shape& output_shape,
const float* tensors_buffer) {
std::function<void(absl::Span<const float> values,
absl::Span<float> activated_values)>
activation_fn;
switch (options_.segmenter_options().activation()) {
case SegmenterOptions::SIGMOID:
activation_fn = &Sigmoid;
break;
case SegmenterOptions::SOFTMAX:
activation_fn = &StableSoftmax;
break;
case SegmenterOptions::NONE:
// Just copying for NONE activation.
activation_fn = [](absl::Span<const float> values,
absl::Span<float> activated_values) {
std::copy(values.begin(), values.end(), activated_values.begin());
};
break;
}
const bool is_category_mask = options_.segmenter_options().output_type() ==
SegmenterOptions::CATEGORY_MASK;
const int cv_mat_type = is_category_mask ? CV_8UC1 : CV_32FC1;
const int output_masks_num = output_shape.channels;
// TODO Use libyuv for resizing instead.
std::vector<cv::Mat> segmented_mask_mats;
segmented_mask_mats.reserve(output_masks_num);
for (int i = 0; i < output_masks_num; ++i) {
segmented_mask_mats.push_back(
cv::Mat(input_shape.height, input_shape.width, cv_mat_type));
}
// Applies activation function.
const int tensor_size = input_shape.height * input_shape.width;
if (is_category_mask) {
for (int i = 0; i < tensor_size; ++i) {
absl::Span<const float> confidence_scores(
&tensors_buffer[i * input_shape.channels], input_shape.channels);
const int maximum_category_idx =
std::max_element(confidence_scores.begin(), confidence_scores.end()) -
confidence_scores.begin();
segmented_mask_mats[0].at<uint8_t>(
i / input_shape.width, i % input_shape.width) = maximum_category_idx;
}
if (options_.segmenter_options().output_type() ==
SegmenterOptions::CATEGORY_MASK) {
return ProcessForCategoryMaskCpu(input_shape, output_shape,
options_.segmenter_options(),
tensors_buffer);
} else {
std::vector<float> activated_values(input_shape.channels);
absl::Span<float> activated_values_span(activated_values);
for (int i = 0; i < tensor_size; ++i) {
activation_fn(
absl::MakeConstSpan(&tensors_buffer[i * input_shape.channels],
input_shape.channels),
activated_values_span);
for (int j = 0; j < input_shape.channels; ++j) {
segmented_mask_mats[j].at<float>(
i / input_shape.width, i % input_shape.width) = activated_values[j];
}
}
return ProcessForConfidenceMaskCpu(input_shape, output_shape,
options_.segmenter_options(),
tensors_buffer);
}
std::vector<Image> segmented_masks;
segmented_masks.reserve(output_masks_num);
// Resizes segmented masks to required output size.
for (int i = 0; i < segmented_mask_mats.size(); i++) {
// Pre-allocates ImageFrame memory to avoid copying from cv::Mat afterward.
ImageFrameSharedPtr image_frame_ptr = std::make_shared<ImageFrame>(
is_category_mask ? ImageFormat::GRAY8 : ImageFormat::VEC32F1,
output_shape.width, output_shape.height, 1);
cv::Mat resized_mask_mat_view =
mediapipe::formats::MatView(image_frame_ptr.get());
cv::resize(segmented_mask_mats[i], resized_mask_mat_view,
resized_mask_mat_view.size(), 0, 0,
cv_mat_type == CV_8UC1 ? cv::INTER_NEAREST : cv::INTER_LINEAR);
segmented_masks.push_back(Image(image_frame_ptr));
}
return segmented_masks;
}
MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToSegmentationCalculator);

View File

@ -401,7 +401,7 @@ class ImageSegmenterGraph : public core::ModelTaskGraph {
} else {
ASSIGN_OR_RETURN(const tflite::Tensor* output_tensor,
GetOutputTensor(model_resources));
const int segmentation_streams_num = *output_tensor->shape()->rbegin();
int segmentation_streams_num = *output_tensor->shape()->rbegin();
for (int i = 0; i < segmentation_streams_num; ++i) {
segmented_masks.push_back(Source<Image>(
tensor_to_images[Output<Image>::Multiple(kSegmentationTag)][i]));

View File

@ -62,6 +62,11 @@ constexpr char kSelfie128x128WithMetadata[] = "selfie_segm_128_128_3.tflite";
constexpr char kSelfie144x256WithMetadata[] = "selfie_segm_144_256_3.tflite";
constexpr char kSelfieSegmentation[] = "selfie_segmentation.tflite";
constexpr char kSelfieSegmentationLandscape[] =
"selfie_segmentation_landscape.tflite";
constexpr char kHairSegmentationWithMetadata[] = "hair_segmentation.tflite";
constexpr float kGoldenMaskSimilarity = 0.98;
@ -90,13 +95,8 @@ cv::Mat PostProcessResultMask(const cv::Mat& mask) {
}
Image GetSRGBImage(const std::string& image_path) {
// TODO: fix test so RGB really is used and not BGR/BGRA.
// mediapipe/app/aimatter/segmentation/segmenter_test_common.cc
// golden masks are generated with BGR image. To align with the unittest of
// aimatter segmenter, here reads image as BGR as well (opencv reads image as
// BGR). Once the correctness of mediapipe tasks segmenter is verified, change
// the golden masks to be generated by RGB image.
cv::Mat image_mat = cv::imread(image_path);
cv::cvtColor(image_mat, image_mat, cv::COLOR_BGR2RGB);
mediapipe::ImageFrame image_frame(
mediapipe::ImageFormat::SRGB, image_mat.cols, image_mat.rows,
image_mat.step, image_mat.data, [image_mat](uint8_t[]) {});
@ -435,6 +435,85 @@ TEST_F(ImageModeTest, SucceedsSelfie144x256Segmentations) {
SimilarToFloatMask(expected_mask_float, kGoldenMaskSimilarity));
}
TEST_F(ImageModeTest, SucceedsPortraitSelfieSegmentationConfidenceMask) {
Image image =
GetSRGBImage(JoinPath("./", kTestDataDirectory, "portrait.jpg"));
auto options = std::make_unique<ImageSegmenterOptions>();
options->base_options.model_asset_path =
JoinPath("./", kTestDataDirectory, kSelfieSegmentation);
options->output_type = ImageSegmenterOptions::OutputType::CONFIDENCE_MASK;
options->activation = ImageSegmenterOptions::Activation::NONE;
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
ImageSegmenter::Create(std::move(options)));
MP_ASSERT_OK_AND_ASSIGN(auto confidence_masks, segmenter->Segment(image));
EXPECT_EQ(confidence_masks.size(), 1);
MP_ASSERT_OK(segmenter->Close());
cv::Mat expected_mask = cv::imread(
JoinPath("./", kTestDataDirectory,
"portrait_selfie_segmentation_expected_confidence_mask.jpg"),
cv::IMREAD_GRAYSCALE);
cv::Mat expected_mask_float;
expected_mask.convertTo(expected_mask_float, CV_32FC1, 1 / 255.f);
cv::Mat selfie_mask = mediapipe::formats::MatView(
confidence_masks[0].GetImageFrameSharedPtr().get());
EXPECT_THAT(selfie_mask,
SimilarToFloatMask(expected_mask_float, kGoldenMaskSimilarity));
}
TEST_F(ImageModeTest, SucceedsPortraitSelfieSegmentationCategoryMask) {
Image image =
GetSRGBImage(JoinPath("./", kTestDataDirectory, "portrait.jpg"));
auto options = std::make_unique<ImageSegmenterOptions>();
options->base_options.model_asset_path =
JoinPath("./", kTestDataDirectory, kSelfieSegmentation);
options->output_type = ImageSegmenterOptions::OutputType::CATEGORY_MASK;
options->activation = ImageSegmenterOptions::Activation::NONE;
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
ImageSegmenter::Create(std::move(options)));
MP_ASSERT_OK_AND_ASSIGN(auto category_mask, segmenter->Segment(image));
EXPECT_EQ(category_mask.size(), 1);
MP_ASSERT_OK(segmenter->Close());
cv::Mat selfie_mask = mediapipe::formats::MatView(
category_mask[0].GetImageFrameSharedPtr().get());
cv::Mat expected_mask = cv::imread(
JoinPath("./", kTestDataDirectory,
"portrait_selfie_segmentation_expected_category_mask.jpg"),
cv::IMREAD_GRAYSCALE);
EXPECT_THAT(selfie_mask,
SimilarToUint8Mask(expected_mask, kGoldenMaskSimilarity, 255));
}
TEST_F(ImageModeTest, SucceedsPortraitSelfieSegmentationLandscapeCategoryMask) {
Image image =
GetSRGBImage(JoinPath("./", kTestDataDirectory, "portrait.jpg"));
auto options = std::make_unique<ImageSegmenterOptions>();
options->base_options.model_asset_path =
JoinPath("./", kTestDataDirectory, kSelfieSegmentationLandscape);
options->output_type = ImageSegmenterOptions::OutputType::CATEGORY_MASK;
options->activation = ImageSegmenterOptions::Activation::NONE;
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
ImageSegmenter::Create(std::move(options)));
MP_ASSERT_OK_AND_ASSIGN(auto category_mask, segmenter->Segment(image));
EXPECT_EQ(category_mask.size(), 1);
MP_ASSERT_OK(segmenter->Close());
cv::Mat selfie_mask = mediapipe::formats::MatView(
category_mask[0].GetImageFrameSharedPtr().get());
cv::Mat expected_mask = cv::imread(
JoinPath(
"./", kTestDataDirectory,
"portrait_selfie_segmentation_landscape_expected_category_mask.jpg"),
cv::IMREAD_GRAYSCALE);
EXPECT_THAT(selfie_mask,
SimilarToUint8Mask(expected_mask, kGoldenMaskSimilarity, 255));
}
TEST_F(ImageModeTest, SucceedsHairSegmentation) {
Image image =
GetSRGBAImage(JoinPath("./", kTestDataDirectory, "portrait.jpg"));

View File

@ -70,6 +70,9 @@ mediapipe_files(srcs = [
"portrait.jpg",
"portrait_hair_expected_mask.jpg",
"portrait_rotated.jpg",
"portrait_selfie_segmentation_expected_category_mask.jpg",
"portrait_selfie_segmentation_expected_confidence_mask.jpg",
"portrait_selfie_segmentation_landscape_expected_category_mask.jpg",
"pose.jpg",
"pose_detection.tflite",
"right_hands.jpg",
@ -129,6 +132,9 @@ filegroup(
"portrait.jpg",
"portrait_hair_expected_mask.jpg",
"portrait_rotated.jpg",
"portrait_selfie_segmentation_expected_category_mask.jpg",
"portrait_selfie_segmentation_expected_confidence_mask.jpg",
"portrait_selfie_segmentation_landscape_expected_category_mask.jpg",
"pose.jpg",
"right_hands.jpg",
"right_hands_rotated.jpg",

View File

@ -886,6 +886,24 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_rotated.jpg?generation=1677194680138164"],
)
http_file(
name = "com_google_mediapipe_portrait_selfie_segmentation_expected_category_mask_jpg",
sha256 = "d8f20fa746e14067f668dd293f21bbc50ec81196d186386a6ded1278c3ec8f46",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_selfie_segmentation_expected_category_mask.jpg?generation=1678606935088873"],
)
http_file(
name = "com_google_mediapipe_portrait_selfie_segmentation_expected_confidence_mask_jpg",
sha256 = "25b723e90608edaf6ed92f382da703dc904a59c87525b6d271e60d9eed7a90e9",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_selfie_segmentation_expected_confidence_mask.jpg?generation=1678606937358235"],
)
http_file(
name = "com_google_mediapipe_portrait_selfie_segmentation_landscape_expected_category_mask_jpg",
sha256 = "f5c3fa3d93f8e7289b69b8a89c2519276dfa5014dcc50ed6e86e8cd4d4ae7f27",
urls = ["https://storage.googleapis.com/mediapipe-assets/portrait_selfie_segmentation_landscape_expected_category_mask.jpg?generation=1678606939469429"],
)
http_file(
name = "com_google_mediapipe_pose_detection_tflite",
sha256 = "9ba9dd3d42efaaba86b4ff0122b06f29c4122e756b329d89dca1e297fd8f866c",
@ -1014,8 +1032,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_selfie_segm_128_128_3_expected_mask_jpg",
sha256 = "a295f3ab394a5e0caff2db5041337da58341ec331f1413ef91f56e0d650b4a1e",
urls = ["https://storage.googleapis.com/mediapipe-assets/selfie_segm_128_128_3_expected_mask.jpg?generation=1661875916766416"],
sha256 = "1a2a068287d8bcd4184492485b3dbb95a09b763f4653fd729d14a836147eb383",
urls = ["https://storage.googleapis.com/mediapipe-assets/selfie_segm_128_128_3_expected_mask.jpg?generation=1678606942616777"],
)
http_file(
@ -1026,8 +1044,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_selfie_segm_144_256_3_expected_mask_jpg",
sha256 = "cfc699db9670585c04414d0d1a07b289a027ba99d6903d2219f897d34e2c9952",
urls = ["https://storage.googleapis.com/mediapipe-assets/selfie_segm_144_256_3_expected_mask.jpg?generation=1661875922646736"],
sha256 = "2de433b6e8adabec2aaf80135232db900903ead4f2811c0c9378a6792b2a68b5",
urls = ["https://storage.googleapis.com/mediapipe-assets/selfie_segm_144_256_3_expected_mask.jpg?generation=1678606945085676"],
)
http_file(