face style pipeline

This commit is contained in:
mslight 2022-06-28 12:45:47 +04:00
parent 33ed0f7c23
commit fcfe31a67b
9 changed files with 1104 additions and 655 deletions

View File

@ -0,0 +1,50 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "fast_utils_calculator",
srcs = ["fast_utils_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/util:color_cc_proto",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:opencv_highgui",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/util:annotation_renderer",
"//mediapipe/util:render_data_cc_proto",
],
alwayslink = 1,
)

View File

@ -0,0 +1,399 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include <cmath>
#include <map>
#include <string>
//#include <android/log.h>
#include <memory>
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/annotation_renderer.h"
#include "mediapipe/util/render_data.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/color.pb.h"
namespace mediapipe
{
namespace
{
static const std::vector<cv::Point> FFHQ_NORM_LM = {
{638.68525475 / 1024, 486.24604922 / 1024},
{389.31496114 / 1024, 485.8921848 / 1024},
{513.67979275 / 1024, 620.8915371 / 1024},
{405.50932642 / 1024, 756.52797927 / 1024},
{622.55630397 / 1024, 756.15509499 / 1024}};
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kVectorTag[] = "VECTOR";
std::tuple<int, int> _normalized_to_pixel_coordinates(float normalized_x,
float normalized_y, int image_width, int image_height)
{
// Converts normalized value pair to pixel coordinates
int x_px = std::min<int>(floor(normalized_x * image_width), image_width - 1);
int y_px = std::min<int>(floor(normalized_y * image_height), image_height - 1);
return {x_px, y_px};
};
static const std::unordered_set<cv::Point> FACEMESH_FACE_OVAL =
{{10, 338}, {338, 297}, {297, 332}, {332, 284}, {284, 251}, {251, 389}, {389, 356}, {356, 454}, {454, 323}, {323, 361}, {361, 288}, {288, 397}, {397, 365}, {365, 379}, {379, 378}, {378, 400}, {400, 377}, {377, 152}, {152, 148}, {148, 176}, {176, 149}, {149, 150}, {150, 136}, {136, 172}, {172, 58}, {58, 132}, {132, 93}, {93, 234}, {234, 127}, {127, 162}, {162, 21}, {21, 54}, {54, 103}, {103, 67}, {67, 109}, {109, 10}};
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
inline bool HasImageTag(mediapipe::CalculatorContext *cc) { return false; }
using Point = RenderAnnotation::Point;
bool NormalizedtoPixelCoordinates(double normalized_x, double normalized_y,
int image_width, int image_height, int *x_px,
int *y_px)
{
CHECK(x_px != nullptr);
CHECK(y_px != nullptr);
CHECK_GT(image_width, 0);
CHECK_GT(image_height, 0);
if (normalized_x < 0 || normalized_x > 1.0 || normalized_y < 0 ||
normalized_y > 1.0)
{
VLOG(1) << "Normalized coordinates must be between 0.0 and 1.0";
}
*x_px = static_cast<int32>(round(normalized_x * image_width));
*y_px = static_cast<int32>(round(normalized_y * image_height));
return true;
}
} // namespace
class FastUtilsCalculator : public CalculatorBase
{
public:
FastUtilsCalculator() = default;
~FastUtilsCalculator() override = default;
static absl::Status GetContract(CalculatorContract *cc);
// From Calculator.
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
absl::Status Close(CalculatorContext *cc) override;
private:
absl::Status CreateRenderTargetCpu(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format);
absl::Status RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat);
absl::Status Call(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks);
// Indicates if image frame is available as input.
bool image_frame_available_ = false;
std::unordered_map<std::string, const std::vector<int>> index_dict = {
{"leftEye", {384, 385, 386, 387, 388, 390, 263, 362, 398, 466, 373, 374, 249, 380, 381, 382}},
{"rightEye", {160, 33, 161, 163, 133, 7, 173, 144, 145, 246, 153, 154, 155, 157, 158, 159}},
{"nose", {4}},
{"lips", {0, 13, 14, 17, 84}},
{"leftLips", {61, 146}},
{"rightLips", {291, 375}},
};
int width_ = 0;
int height_ = 0;
int width_canvas_ = 0; // Size of overlay drawing texture canvas.
int height_canvas_ = 0;
int max_num_faces = 1;
bool refine_landmarks = True;
double min_detection_confidence = 0.5;
double min_tracking_confidence = 0.5;
};
REGISTER_CALCULATOR(FastUtilsCalculator);
absl::Status FastUtilsCalculator::GetContract(CalculatorContract *cc)
{
CHECK_GE(cc->Inputs().NumEntries(), 1);
if (cc->Inputs().HasTag(kImageFrameTag))
{
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
CHECK(cc->Outputs().HasTag(kImageFrameTag));
}
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag(kImageFrameTag) || HasImageTag(cc))
{
image_frame_available_ = true;
}
else
{
}
// Set the output header based on the input header (if present).
const char *tag = kImageFrameTag;
if (image_frame_available_ && !cc->Inputs().Tag(tag).Header().IsEmpty())
{
const auto &input_header =
cc->Inputs().Tag(tag).Header().Get<VideoHeader>();
auto *output_video_header = new VideoHeader(input_header);
cc->Outputs().Tag(tag).SetHeader(Adopt(output_video_header));
}
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty())
{
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;
std::unordered_map<std::string, cv::Mat> all_masks;
if (cc->Outputs().HasTag(kImageFrameTag))
{
MP_RETURN_IF_ERROR(CreateRenderTargetCpu(cc, image_mat, &target_format));
}
// Render streams onto render target.
for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId();
++id)
{
auto tag_and_index = cc->Inputs().TagAndIndexFromId(id);
std::string tag = tag_and_index.first;
if (!tag.empty() && tag != kVectorTag)
{
continue;
}
if (cc->Inputs().Get(id).IsEmpty())
{
continue;
}
if (tag.empty())
{
// Empty tag defaults to accepting a single object of RenderData type.
const RenderData &render_data = cc->Inputs().Get(id).Get<RenderData>();
MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks));
}
else
{
RET_CHECK_EQ(kVectorTag, tag);
const std::vector<RenderData> &render_data_vec =
cc->Inputs().Get(id).Get<std::vector<RenderData>>();
for (const RenderData &render_data : render_data_vec)
{
MP_RETURN_IF_ERROR(Call(cc, image_mat, &target_format, render_data, all_masks));
}
}
}
// Copy the rendered image to output.
uchar *image_mat_ptr = image_mat->data;
MP_RETURN_IF_ERROR(RenderToCpu(cc, target_format, image_mat_ptr, image_mat));
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::Close(CalculatorContext *cc)
{
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::RenderToCpu(
CalculatorContext *cc, const ImageFormat::Format &target_format,
uchar *data_image, std::unique_ptr<cv::Mat> &image_mat)
{
cv::Mat mat_image_ = *image_mat.get();
auto output_frame = absl::make_unique<ImageFrame>(
target_format, mat_image_.cols, mat_image_.rows);
output_frame->CopyPixelData(target_format, mat_image_.cols, mat_image_.rows, data_image,
ImageFrame::kDefaultAlignmentBoundary);
if (cc->Outputs().HasTag(kImageFrameTag))
{
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_frame.release(), cc->InputTimestamp());
}
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::CreateRenderTargetCpu(
CalculatorContext *cc, std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format)
{
if (image_frame_available_)
{
const auto &input_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
int target_mat_type;
switch (input_frame.Format())
{
case ImageFormat::SRGBA:
*target_format = ImageFormat::SRGBA;
target_mat_type = CV_8UC4;
break;
case ImageFormat::SRGB:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
case ImageFormat::GRAY8:
*target_format = ImageFormat::SRGB;
target_mat_type = CV_8UC3;
break;
default:
return absl::UnknownError("Unexpected image frame format.");
break;
}
image_mat = absl::make_unique<cv::Mat>(
input_frame.Height(), input_frame.Width(), target_mat_type);
auto input_mat = formats::MatView(&input_frame);
if (input_frame.Format() == ImageFormat::GRAY8)
{
cv::Mat rgb_mat;
cv::cvtColor(input_mat, rgb_mat, CV_GRAY2RGB);
rgb_mat.copyTo(*image_mat);
}
else
{
input_mat.copyTo(*image_mat);
}
}
else
{
image_mat = absl::make_unique<cv::Mat>(
150, 150, CV_8UC4,
cv::Scalar(255, 255,
255));
*target_format = ImageFormat::SRGBA;
}
return absl::OkStatus();
}
absl::Status FastUtilsCalculator::Call(CalculatorContext *cc,
std::unique_ptr<cv::Mat> &image_mat,
ImageFormat::Format *target_format,
const RenderData &render_data,
std::unordered_map<std::string, cv::Mat> &all_masks)
{
cv::Mat mat_image_ = *image_mat.get();
int image_width_ = image_mat->cols;
int image_height_ = image_mat->rows;
cv::Mat mask;
std::vector<cv::Point> kps, landmarks;
std::vector<std::vector<cv::Point>> lms_out;
int c = 0;
for (const auto &[key, value] : index_dict)
{
for (auto order : value)
{
c = 0;
for (auto &annotation : render_data.render_annotations())
{
if (annotation.data_case() == RenderAnnotation::kPoint)
{
if (order == c)
{
const auto &point = annotation.point();
int x = -1;
int y = -1;
CHECK(NormalizedtoPixelCoordinates(point.x(), point.y(), image_width_,
image_height_, &x, &y));
kps.push_back(cv::Point(x, y));
}
c += 1;
}
}
}
double sumx = 0, sumy = 0, meanx, meany;
for (auto p : kps)
{
sumx += p.x;
sumy += p.y;
}
meanx = sumx / kps.size();
meany = sumy / kps.size();
landmarks.push_back({meanx, meany});
kps.clear();
}
lms_out.push_back(landmarks);
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -13,6 +13,8 @@
// limitations under the License.
#include <vector>
#include <chrono>
#include <iostream>
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
@ -54,16 +56,27 @@
#include "mediapipe/gpu/MPPMetalUtil.h"
#endif // MEDIAPIPE_METAL_ENABLED
namespace {
namespace
{
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
enum
{
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
NUM_ATTRIBUTES
};
std::chrono::steady_clock::time_point begin;
std::chrono::steady_clock::time_point end;
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) { // NOLINT
int NumGroups(const int size, const int group_size)
{ // NOLINT
return (size + group_size - 1) / group_size;
}
bool CanUseGpu() {
bool CanUseGpu()
{
#if !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED
// TODO: Configure GPU usage policy in individual calculators.
constexpr bool kAllowGpuProcessing = true;
@ -78,20 +91,27 @@ constexpr char kOutputSizeTag[] = "OUTPUT_SIZE";
constexpr char kMaskTag[] = "MASK";
absl::StatusOr<std::tuple<int, int, int>> GetHwcFromDims(
const std::vector<int>& dims) {
if (dims.size() == 3) {
const std::vector<int> &dims)
{
if (dims.size() == 3)
{
return std::make_tuple(dims[0], dims[1], dims[2]);
} else if (dims.size() == 4) {
}
else if (dims.size() == 4)
{
// BHWC format check B == 1
RET_CHECK_EQ(1, dims[0]) << "Expected batch to be 1 for BHWC heatmap";
return std::make_tuple(dims[1], dims[2], dims[3]);
} else {
}
else
{
RET_CHECK(false) << "Invalid shape for segmentation tensor " << dims.size();
}
}
} // namespace
namespace mediapipe {
namespace mediapipe
{
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
using ::tflite::gpu::gl::GlProgram;
@ -140,7 +160,8 @@ using ::tflite::gpu::gl::GlShader;
//
// TODO Refactor and add support for other backends/platforms.
//
class TensorsToSegmentationCalculator : public CalculatorBase {
class TensorsToSegmentationCalculator : public CalculatorBase
{
public:
static absl::Status GetContract(CalculatorContract *cc);
@ -155,7 +176,8 @@ class TensorsToSegmentationCalculator : public CalculatorBase {
absl::Status ProcessCpu(CalculatorContext *cc);
void GlRender();
bool DoesGpuTextureStartAtBottom() {
bool DoesGpuTextureStartAtBottom()
{
return options_.gpu_origin() != mediapipe::GpuOrigin_Mode_TOP_LEFT;
}
@ -182,20 +204,23 @@ REGISTER_CALCULATOR(TensorsToSegmentationCalculator);
// static
absl::Status TensorsToSegmentationCalculator::GetContract(
CalculatorContract* cc) {
CalculatorContract *cc)
{
RET_CHECK(!cc->Inputs().GetTags().empty());
RET_CHECK(!cc->Outputs().GetTags().empty());
// Inputs.
cc->Inputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
if (cc->Inputs().HasTag(kOutputSizeTag)) {
if (cc->Inputs().HasTag(kOutputSizeTag))
{
cc->Inputs().Tag(kOutputSizeTag).Set<std::pair<int, int>>();
}
// Outputs.
cc->Outputs().Tag(kMaskTag).Set<Image>();
if (CanUseGpu()) {
if (CanUseGpu())
{
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#if MEDIAPIPE_METAL_ENABLED
@ -207,11 +232,13 @@ absl::Status TensorsToSegmentationCalculator::GetContract(
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext* cc) {
absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext *cc)
{
cc->SetOffset(TimestampDiff(0));
bool use_gpu = false;
if (CanUseGpu()) {
begin = std::chrono::steady_clock::now();
if (CanUseGpu())
{
#if !MEDIAPIPE_DISABLE_GPU
use_gpu = true;
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
@ -224,7 +251,8 @@ absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadOptions(cc));
if (use_gpu) {
if (use_gpu)
{
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(InitGpu(cc));
#else
@ -235,8 +263,10 @@ absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext* cc) {
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) {
absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext *cc)
{
if (cc->Inputs().Tag(kTensorsTag).IsEmpty())
{
return absl::OkStatus();
}
@ -244,10 +274,13 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
bool use_gpu = false;
if (CanUseGpu()) {
if (CanUseGpu())
{
// Use GPU processing only if at least one input tensor is already on GPU.
for (const auto& tensor : input_tensors) {
if (tensor.ready_on_gpu()) {
for (const auto &tensor : input_tensors)
{
if (tensor.ready_on_gpu())
{
use_gpu = true;
break;
}
@ -255,7 +288,7 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
}
// Validate tensor channels and activation type.
{
/*{
RET_CHECK(!input_tensors.empty());
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
int tensor_channels = std::get<2>(hwc);
@ -272,32 +305,44 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
break;
}
}
if (use_gpu) {
*/
/* if (use_gpu)
{
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status
{
MP_RETURN_IF_ERROR(ProcessGpu(cc));
return absl::OkStatus();
}));
return absl::OkStatus(); }));
#else
RET_CHECK_FAIL() << "GPU processing disabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
} else {
MP_RETURN_IF_ERROR(ProcessCpu(cc));
}
else
{ */
MP_RETURN_IF_ERROR(ProcessCpu(cc));
//}
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext* cc) {
absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext *cc)
{
end = std::chrono::steady_clock::now();
std::cout << "Time difference = " << std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count() << "[µs]" << std::endl;
std::cout << "Time difference = " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin).count() << "[ns]" << std::endl;
#if !MEDIAPIPE_DISABLE_GPU
gpu_helper_.RunInGlContext([this] {
if (upsample_program_) glDeleteProgram(upsample_program_);
gpu_helper_.RunInGlContext([this]
{
if (upsample_program_)
glDeleteProgram(upsample_program_);
upsample_program_ = 0;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
mask_program_31_.reset();
#else
if (mask_program_20_) glDeleteProgram(mask_program_20_);
if (mask_program_20_)
glDeleteProgram(mask_program_20_);
mask_program_20_ = 0;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#if MEDIAPIPE_METAL_ENABLED
@ -310,14 +355,16 @@ absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext* cc) {
}
absl::Status TensorsToSegmentationCalculator::ProcessCpu(
CalculatorContext* cc) {
CalculatorContext *cc)
{
// Get input streams, and dimensions.
const auto &input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
int output_width = tensor_width, output_height = tensor_height;
if (cc->Inputs().HasTag(kOutputSizeTag)) {
if (cc->Inputs().HasTag(kOutputSizeTag))
{
const auto &size =
cc->Inputs().Tag(kOutputSizeTag).Get<std::pair<int, int>>();
output_width = size.first;
@ -335,29 +382,24 @@ absl::Status TensorsToSegmentationCalculator::ProcessCpu(
CV_MAKETYPE(CV_32F, tensor_channels),
const_cast<float *>(raw_input_data));
// Process mask tensor and apply activation function.
if (tensor_channels == 2) {
MP_RETURN_IF_ERROR(ApplyActivation<cv::Vec2f>(tensor_mat, &small_mask_mat));
} else if (tensor_channels == 1) {
RET_CHECK(mediapipe::TensorsToSegmentationCalculatorOptions::SOFTMAX !=
options_.activation()); // Requires 2 channels.
if (mediapipe::TensorsToSegmentationCalculatorOptions::NONE ==
options_.activation()) // Pass-through optimization.
tensor_mat.copyTo(small_mask_mat);
else
MP_RETURN_IF_ERROR(ApplyActivation<float>(tensor_mat, &small_mask_mat));
} else {
RET_CHECK_FAIL() << "Unsupported number of tensor channels "
<< tensor_channels;
}
// std::cout << tensor_mat.channels() << std::endl;
std::vector<cv::Mat> channels(4);
cv::split(tensor_mat, channels);
for (auto ch : channels)
ch = (ch + 1) * 127.5;
cv::merge(channels, tensor_mat);
cv::convertScaleAbs(tensor_mat, tensor_mat);
// std::cout << "R (numpy) = " << std::endl << cv::format(tensor_mat, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl;
// Send out image as CPU packet.
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_width, output_height);
ImageFormat::SRGB, output_width, output_height);
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output.
cv::resize(small_mask_mat, *output_mat,
cv::resize(tensor_mat, *output_mat,
cv::Size(output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
@ -366,25 +408,31 @@ absl::Status TensorsToSegmentationCalculator::ProcessCpu(
template <class T>
absl::Status TensorsToSegmentationCalculator::ApplyActivation(
cv::Mat& tensor_mat, cv::Mat* small_mask_mat) {
cv::Mat &tensor_mat, cv::Mat *small_mask_mat)
{
// Configure activation function.
const int output_layer_index = options_.output_layer_index();
typedef mediapipe::TensorsToSegmentationCalculatorOptions Options;
const auto activation_fn = [&](const cv::Vec2f& mask_value) {
const auto activation_fn = [&](const cv::Vec2f &mask_value)
{
float new_mask_value = 0;
// TODO consider moving switch out of the loop,
// and also avoid float/Vec2f casting.
switch (options_.activation()) {
case Options::NONE: {
switch (options_.activation())
{
case Options::NONE:
{
new_mask_value = mask_value[0];
break;
}
case Options::SIGMOID: {
case Options::SIGMOID:
{
const float pixel0 = mask_value[0];
new_mask_value = 1.0 / (std::exp(-pixel0) + 1.0);
break;
}
case Options::SOFTMAX: {
case Options::SOFTMAX:
{
const float pixel0 = mask_value[0];
const float pixel1 = mask_value[1];
const float max_pixel = std::max(pixel0, pixel1);
@ -401,8 +449,10 @@ absl::Status TensorsToSegmentationCalculator::ApplyActivation(
};
// Process mask tensor.
for (int i = 0; i < tensor_mat.rows; ++i) {
for (int j = 0; j < tensor_mat.cols; ++j) {
for (int i = 0; i < tensor_mat.rows; ++i)
{
for (int j = 0; j < tensor_mat.cols; ++j)
{
const T &input_pix = tensor_mat.at<T>(i, j);
const float mask_value = activation_fn(input_pix);
small_mask_mat->at<float>(i, j) = mask_value;
@ -417,142 +467,60 @@ absl::Status TensorsToSegmentationCalculator::ApplyActivation(
// 2. process segmentation tensor into small mask
// 3. upsample small mask into output mask to be same size as input image
absl::Status TensorsToSegmentationCalculator::ProcessGpu(
CalculatorContext* cc) {
CalculatorContext *cc)
{
#if !MEDIAPIPE_DISABLE_GPU
// Get input streams, and dimensions.
const auto &input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
int output_width = tensor_width, output_height = tensor_height;
if (cc->Inputs().HasTag(kOutputSizeTag)) {
if (cc->Inputs().HasTag(kOutputSizeTag))
{
const auto &size =
cc->Inputs().Tag(kOutputSizeTag).Get<std::pair<int, int>>();
output_width = size.first;
output_height = size.second;
}
// Create initial working mask texture.
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
tflite::gpu::gl::GlTexture small_mask_texture;
#else
mediapipe::GlTexture small_mask_texture;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Wrap input tensor.
auto raw_input_tensor = &input_tensors[0];
auto raw_input_view = raw_input_tensor->GetCpuReadView();
const float *raw_input_data = raw_input_view.buffer<float>();
cv::Mat tensor_mat(cv::Size(tensor_width, tensor_height),
CV_MAKETYPE(CV_32F, tensor_channels),
const_cast<float *>(raw_input_data));
// Run shader, process mask tensor.
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
{
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
tflite::gpu::DataType::UINT8, // GL_RGBA8
{tensor_width, tensor_height}, &small_mask_texture));
// std::cout << tensor_mat.channels() << std::endl;
std::vector<cv::Mat> channels(4);
cv::split(tensor_mat, channels);
for (auto ch : channels)
ch = (ch + 1) * 127.5;
const int output_index = 0;
glBindImageTexture(output_index, small_mask_texture.id(), 0, GL_FALSE, 0,
GL_WRITE_ONLY, GL_RGBA8);
cv::merge(channels, tensor_mat);
auto read_view = input_tensors[0].GetOpenGlBufferReadView();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name());
const tflite::gpu::uint3 workgroups = {
NumGroups(tensor_width, kWorkgroupSize),
NumGroups(tensor_height, kWorkgroupSize), 1};
glUseProgram(mask_program_31_->id());
glUniform2i(glGetUniformLocation(mask_program_31_->id(), "out_size"),
tensor_width, tensor_height);
MP_RETURN_IF_ERROR(mask_program_31_->Dispatch(workgroups));
}
#elif MEDIAPIPE_METAL_ENABLED
{
id<MTLCommandBuffer> command_buffer = [metal_helper_ commandBuffer];
command_buffer.label = @"SegmentationKernel";
id<MTLComputeCommandEncoder> command_encoder =
[command_buffer computeCommandEncoder];
[command_encoder setComputePipelineState:mask_program_];
auto read_view = input_tensors[0].GetMtlBufferReadView(command_buffer);
[command_encoder setBuffer:read_view.buffer() offset:0 atIndex:0];
mediapipe::GpuBuffer small_mask_buffer = [metal_helper_
mediapipeGpuBufferWithWidth:tensor_width
height:tensor_height
format:mediapipe::GpuBufferFormat::kBGRA32];
id<MTLTexture> small_mask_texture_metal =
[metal_helper_ metalTextureWithGpuBuffer:small_mask_buffer];
[command_encoder setTexture:small_mask_texture_metal atIndex:1];
unsigned int out_size[] = {static_cast<unsigned int>(tensor_width),
static_cast<unsigned int>(tensor_height)};
[command_encoder setBytes:&out_size length:sizeof(out_size) atIndex:2];
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1);
MTLSize threadgroups =
MTLSizeMake(NumGroups(tensor_width, kWorkgroupSize),
NumGroups(tensor_height, kWorkgroupSize), 1);
[command_encoder dispatchThreadgroups:threadgroups
threadsPerThreadgroup:threads_per_group];
[command_encoder endEncoding];
[command_buffer commit];
small_mask_texture = gpu_helper_.CreateSourceTexture(small_mask_buffer);
}
#else
{
small_mask_texture = gpu_helper_.CreateDestinationTexture(
tensor_width, tensor_height,
mediapipe::GpuBufferFormat::kBGRA32); // actually GL_RGBA8
// Go through CPU if not already texture 2D (no direct conversion yet).
// Tensor::GetOpenGlTexture2dReadView() doesn't automatically convert types.
if (!input_tensors[0].ready_as_opengl_texture_2d()) {
(void)input_tensors[0].GetCpuReadView();
}
auto read_view = input_tensors[0].GetOpenGlTexture2dReadView();
gpu_helper_.BindFramebuffer(small_mask_texture);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, read_view.name());
glUseProgram(mask_program_20_);
GlRender();
glBindTexture(GL_TEXTURE_2D, 0);
glFlush();
}
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
cv::convertScaleAbs(tensor_mat, tensor_mat);
// std::cout << "R (numpy) = " << std::endl << cv::format(tensor_mat, cv::Formatter::FMT_NUMPY ) << std::endl << std::endl;
// Send out image as CPU packet.
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::SRGB, output_width, output_height);
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output.
mediapipe::GlTexture output_texture = gpu_helper_.CreateDestinationTexture(
output_width, output_height,
mediapipe::GpuBufferFormat::kBGRA32); // actually GL_RGBA8
cv::resize(tensor_mat, *output_mat,
cv::Size(output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
// Run shader, upsample result.
{
gpu_helper_.BindFramebuffer(output_texture);
glActiveTexture(GL_TEXTURE1);
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
glBindTexture(GL_TEXTURE_2D, small_mask_texture.id());
#else
glBindTexture(GL_TEXTURE_2D, small_mask_texture.name());
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
glUseProgram(upsample_program_);
GlRender();
glBindTexture(GL_TEXTURE_2D, 0);
glFlush();
}
// Send out image as GPU packet.
auto output_image = output_texture.GetFrame<Image>();
cc->Outputs().Tag(kMaskTag).Add(output_image.release(), cc->InputTimestamp());
// Cleanup
output_texture.Release();
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
void TensorsToSegmentationCalculator::GlRender() {
void TensorsToSegmentationCalculator::GlRender()
{
#if !MEDIAPIPE_DISABLE_GPU
static const GLfloat square_vertices[] = {
-1.0f, -1.0f, // bottom left
@ -602,16 +570,19 @@ void TensorsToSegmentationCalculator::GlRender() {
}
absl::Status TensorsToSegmentationCalculator::LoadOptions(
CalculatorContext* cc) {
CalculatorContext *cc)
{
// Get calculator options specified in the graph.
options_ = cc->Options<::mediapipe::TensorsToSegmentationCalculatorOptions>();
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::InitGpu(CalculatorContext* cc) {
absl::Status TensorsToSegmentationCalculator::InitGpu(CalculatorContext *cc)
{
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> absl::Status {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> absl::Status
{
// A shader to process a segmentation tensor into an output mask.
// Currently uses 4 channels for output, and sets R+A channels as mask value.
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
@ -872,8 +843,7 @@ void main() {
glUseProgram(upsample_program_);
glUniform1i(glGetUniformLocation(upsample_program_, "video_frame"), 1);
return absl::OkStatus();
}));
return absl::OkStatus(); }));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();

View File

@ -0,0 +1,60 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/image_style:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
android_binary(
name = "imagestylegpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/image_style:mobile_gpu.binarypb",
"//mediapipe/models:model_float32.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.imagestylegpu",
"appName": "Image Style",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
"converterNumBuffers": "2",
},
multidex = "native",
deps = [
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
],
)

View File

@ -24,22 +24,14 @@ package(default_visibility = ["//visibility:public"])
cc_library(
name = "mobile_calculators",
deps = [
"//mediapipe/calculators/tensorflow:tensor_to_image_frame_calculator",
"//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_segmentation_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
"//mediapipe/calculators/tensor:tensors_to_segmentation_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
],
)
@ -47,18 +39,17 @@ cc_library(
name = "desktop_calculators",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_gpuimage_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_segmentation_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/calculators/util:from_image_calculator",
],
)
mediapipe_binary_graph(
name = "mobile_gpu_binary_graph",
graph = "image_style.pbtxt",
graph = "image_style_gpu.pbtxt",
output_name = "mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -42,8 +42,8 @@ node {
options {
[mediapipe.TfLiteConverterCalculatorOptions.ext] {
output_tensor_float_range {
min: 0
max: 255
min: -1
max: 1
}
}
}

View File

@ -1,19 +1,30 @@
# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/object_detection:object_detection_tflite.
# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Input image. (ImageFrame)
input_stream: "input_video"
# Decodes an input video file into images and a video header.
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
@ -23,12 +34,12 @@ node {
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:input_video"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 512
output_height: 512
output_width: 256
output_height: 256
}
}
}
@ -39,58 +50,45 @@ node: {
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "TENSORS:image_tensor"
node_options: {
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
zero_center: true
output_stream: "TENSORS:input_tensors"
options {
[mediapipe.TfLiteConverterCalculatorOptions.ext] {
zero_center: false
max_num_channels: 3
output_tensor_float_range {
min: 0.0
max: 255.0
}
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:stylized_tensor"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/metaf-512-mobile3.tflite"
model_path: "mediapipe/models/model_float32.tflite"
}
}
}
node {
calculator: "TfliteTensorsToGpuImageCalculator"
input_stream: "TENSORS:stylized_tensor"
output_stream: "IMAGE:image"
}
#node {
# calculator: "TfLiteTensorsToSegmentationCalculator"
# input_stream: "TENSORS:stylized_tensor"
# output_stream: "MASK:mask_image"
# node_options: {
# [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
# tensor_width: 512
# tensor_height: 512
# tensor_channels: 3
# }
# }
#}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:image"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
calculator: "TfLiteTensorsToSegmentationCalculator"
input_stream: "TENSORS:output_tensors"
output_stream: "MASK:output_video"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
tensor_width: 256
tensor_height: 256
tensor_channels: 3
}
}
}

View File

@ -6,16 +6,7 @@ input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
@ -27,67 +18,59 @@ node {
output_stream: "throttled_input_video"
}
# Transforms the input image on CPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
}
}
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:throttled_input_video"
output_stream: "IMAGE:image_input_video"
}
# Converts the transformed input image on CPU into an image tensor as a
# TfLiteTensor. The zero_center option is set to true to normalize the
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video"
output_stream: "TENSORS:input_tensors"
options {
[mediapipe.TfLiteConverterCalculatorOptions.ext] {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image_input_video"
output_stream: "TENSORS:input_tensor"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: true
output_tensor_float_range {
min: 0
max: 255
min: -1.0
max: 1.0
}
max_num_channels: 3
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:output_tensor"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/models/model_float32.tflite"
delegate { xnnpack {} }
}
}
}
node {
calculator: "TfLiteTensorsToSegmentationCalculator"
input_stream: "TENSORS:output_tensors"
output_stream: "MASK:output_video"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
tensor_width: 256
tensor_height: 256
tensor_channels: 3
calculator: "TensorsToSegmentationCalculator"
input_stream: "TENSORS:output_tensor"
output_stream: "MASK:output"
options: {
[mediapipe.TensorsToSegmentationCalculatorOptions.ext] {
activation: NONE
}
}
}
node{
calculator: "FromImageCalculator"
input_stream: "IMAGE:output"
output_stream: "IMAGE_CPU:output_video"
}

View File

@ -18,30 +18,18 @@ node {
output_stream: "throttled_input_video"
}
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 512
output_height: 512
}
}
}
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "TENSORS:input_tensors"
options {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 512
output_tensor_height: 512
keep_aspect_ratio: true
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: false
output_tensor_float_range {
min: 0.0
max: 255.0
min: -1.0
max: 1.0
}
gpu_origin: TOP_LEFT
border_mode: BORDER_REPLICATE
@ -49,32 +37,42 @@ node: {
}
}
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS_GPU:input_tensors"
output_stream: "TENSORS_GPU:output_tensors"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/models/metaf-512-mobile3.tflite"
delegate { gpu {} }
model_path: "mediapipe/models/model_float32.tflite"
delegate { xnnpack {} }
}
}
}
# Retrieves the size of the input image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:input_size"
}
# Processes the output tensors into a segmentation mask that has the same size
# as the input image into the graph.
node {
calculator: "TensorsToSegmentationCalculator"
input_stream: "TENSORS:output_tensors"
input_stream: "OUTPUT_SIZE:input_size"
output_stream: "MASK:mask_image"
options: {
[mediapipe.TensorsToSegmentationCalculatorOptions.ext] {
activation: NONE
gpu_origin: TOP_LEFT
}
}
}
node: {
calculator: "FromImageCalculator"
input_stream: "IMAGE:mask_image"