Project import generated by Copybara.

GitOrigin-RevId: d0039a576e2db9c0fcefffd26a527df74cbe145b
This commit is contained in:
MediaPipe Team 2020-04-21 22:12:12 -04:00 committed by chuoling
parent 024f7bf0f1
commit 7bad8fce62
45 changed files with 1566 additions and 227 deletions

View File

@ -19,6 +19,7 @@
* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
* [Objectron: 3D Object Detection and Tracking](mediapipe/docs/objectron_mobile_gpu.md)
* [AutoFlip: Intelligent Video Reframing](mediapipe/docs/autoflip.md)
* [KNIFT: Template Matching with Neural Image Features](mediapipe/docs/template_matching_mobile_cpu.md)
![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
![face_mesh](mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif)
@ -29,6 +30,7 @@
![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif)
![objectron_shoes](mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif)
![objectron_chair](mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif)
![template_matching](mediapipe/docs/images/mobile/template_matching_android_cpu_small.gif)
## Installation
Follow these [instructions](mediapipe/docs/install.md).
@ -53,6 +55,7 @@ Search MediaPipe Github repository using [Google Open Source code search](https:
* [YouTube Channel](https://www.youtube.com/channel/UCObqmpuSMx-usADtL_qdMAw)
## Publications
* [MediaPipe KNIFT: Template-based Feature Matching](https://mediapipe.page.link/knift-blog)
* [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html)
* [MediaPipe Objectron: Real-time 3D Object Detection on Mobile Devices](https://mediapipe.page.link/objectron-aiblog)
* [AutoFlip: An Open Source Framework for Intelligent Video Reframing](https://mediapipe.page.link/autoflip)

View File

@ -19,6 +19,11 @@
#include "mediapipe/gpu/gpu_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace {
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
} // namespace
namespace mediapipe {
// Extracts image properties from the input image and outputs the properties.
@ -40,13 +45,14 @@ namespace mediapipe {
class ImagePropertiesCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag("IMAGE") ^ cc->Inputs().HasTag("IMAGE_GPU"));
if (cc->Inputs().HasTag("IMAGE")) {
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^
cc->Inputs().HasTag(kGpuBufferTag));
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().HasTag("IMAGE_GPU")) {
cc->Inputs().Tag("IMAGE_GPU").Set<::mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kGpuBufferTag)) {
cc->Inputs().Tag(kGpuBufferTag).Set<::mediapipe::GpuBuffer>();
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -66,16 +72,17 @@ class ImagePropertiesCalculator : public CalculatorBase {
int width;
int height;
if (cc->Inputs().HasTag("IMAGE") && !cc->Inputs().Tag("IMAGE").IsEmpty()) {
const auto& image = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
if (cc->Inputs().HasTag(kImageFrameTag) &&
!cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
const auto& image = cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
width = image.Width();
height = image.Height();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().HasTag("IMAGE_GPU") &&
!cc->Inputs().Tag("IMAGE_GPU").IsEmpty()) {
if (cc->Inputs().HasTag(kGpuBufferTag) &&
!cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
const auto& image =
cc->Inputs().Tag("IMAGE_GPU").Get<mediapipe::GpuBuffer>();
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
width = image.width();
height = image.height();
}

View File

@ -47,6 +47,9 @@ namespace mediapipe {
#endif // !MEDIAPIPE_DISABLE_GPU
namespace {
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
int RotationModeToDegrees(mediapipe::RotationMode_Mode rotation) {
switch (rotation) {
case mediapipe::RotationMode_Mode_UNKNOWN:
@ -95,7 +98,7 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// Scales, rotates, and flips images horizontally or vertically.
//
// Input:
// One of the following two tags:
// One of the following tags:
// IMAGE: ImageFrame representing the input image.
// IMAGE_GPU: GpuBuffer representing the input image.
//
@ -113,7 +116,7 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// corresponding field in the calculator options.
//
// Output:
// One of the following two tags:
// One of the following tags:
// IMAGE - ImageFrame representing the output image.
// IMAGE_GPU - GpuBuffer representing the output image.
//
@ -152,7 +155,8 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// Note: To enable horizontal or vertical flipping, specify them in the
// calculator options. Flipping is applied after rotation.
//
// Note: Only scale mode STRETCH is currently supported on CPU.
// Note: Input defines output, so only matchig types supported:
// IMAGE -> IMAGE or IMAGE_GPU -> IMAGE_GPU
//
class ImageTransformationCalculator : public CalculatorBase {
public:
@ -186,7 +190,7 @@ class ImageTransformationCalculator : public CalculatorBase {
bool use_gpu_ = false;
#if !defined(MEDIAPIPE_DISABLE_GPU)
GlCalculatorHelper helper_;
GlCalculatorHelper gpu_helper_;
std::unique_ptr<QuadRenderer> rgb_renderer_;
std::unique_ptr<QuadRenderer> yuv_renderer_;
std::unique_ptr<QuadRenderer> ext_rgb_renderer_;
@ -197,21 +201,22 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
// static
::mediapipe::Status ImageTransformationCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag("IMAGE") ^ cc->Inputs().HasTag("IMAGE_GPU"));
RET_CHECK(cc->Outputs().HasTag("IMAGE") ^ cc->Outputs().HasTag("IMAGE_GPU"));
// Only one input can be set, and the output type must match.
RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^
cc->Inputs().HasTag(kGpuBufferTag));
bool use_gpu = false;
if (cc->Inputs().HasTag("IMAGE")) {
RET_CHECK(cc->Outputs().HasTag("IMAGE"));
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
cc->Outputs().Tag("IMAGE").Set<ImageFrame>();
if (cc->Inputs().HasTag(kImageFrameTag)) {
RET_CHECK(cc->Outputs().HasTag(kImageFrameTag));
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().HasTag("IMAGE_GPU")) {
RET_CHECK(cc->Outputs().HasTag("IMAGE_GPU"));
cc->Inputs().Tag("IMAGE_GPU").Set<GpuBuffer>();
cc->Outputs().Tag("IMAGE_GPU").Set<GpuBuffer>();
if (cc->Inputs().HasTag(kGpuBufferTag)) {
RET_CHECK(cc->Outputs().HasTag(kGpuBufferTag));
cc->Inputs().Tag(kGpuBufferTag).Set<GpuBuffer>();
cc->Outputs().Tag(kGpuBufferTag).Set<GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -259,7 +264,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
options_ = cc->Options<ImageTransformationCalculatorOptions>();
if (cc->Inputs().HasTag("IMAGE_GPU")) {
if (cc->Inputs().HasTag(kGpuBufferTag)) {
use_gpu_ = true;
}
@ -300,7 +305,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
if (use_gpu_) {
#if !defined(MEDIAPIPE_DISABLE_GPU)
// Let the helper access the GL context information.
MP_RETURN_IF_ERROR(helper_.Open(cc));
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#else
RET_CHECK_FAIL() << "GPU processing not enabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
@ -328,18 +333,14 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
if (use_gpu_) {
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().Tag("IMAGE_GPU").IsEmpty()) {
// Image is missing, hence no way to produce output image. (Timestamp
// bound will be updated automatically.)
if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
return helper_.RunInGlContext(
return gpu_helper_.RunInGlContext(
[this, cc]() -> ::mediapipe::Status { return RenderGpu(cc); });
#endif // !MEDIAPIPE_DISABLE_GPU
} else {
if (cc->Inputs().Tag("IMAGE").IsEmpty()) {
// Image is missing, hence no way to produce output image. (Timestamp
// bound will be updated automatically.)
if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
return RenderCpu(cc);
@ -354,7 +355,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
QuadRenderer* rgb_renderer = rgb_renderer_.release();
QuadRenderer* yuv_renderer = yuv_renderer_.release();
QuadRenderer* ext_rgb_renderer = ext_rgb_renderer_.release();
helper_.RunInGlContext([rgb_renderer, yuv_renderer, ext_rgb_renderer] {
gpu_helper_.RunInGlContext([rgb_renderer, yuv_renderer, ext_rgb_renderer] {
if (rgb_renderer) {
rgb_renderer->GlTeardown();
delete rgb_renderer;
@ -376,17 +377,21 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
::mediapipe::Status ImageTransformationCalculator::RenderCpu(
CalculatorContext* cc) {
const auto& input_img = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
cv::Mat input_mat = formats::MatView(&input_img);
cv::Mat scaled_mat;
cv::Mat input_mat;
mediapipe::ImageFormat::Format format;
const int input_width = input_img.Width();
const int input_height = input_img.Height();
const auto& input = cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
input_mat = formats::MatView(&input);
format = input.Format();
const int input_width = input_mat.cols;
const int input_height = input_mat.rows;
if (!output_height_ || !output_width_) {
output_height_ = input_height;
output_width_ = input_width;
}
cv::Mat scaled_mat;
if (scale_mode_ == mediapipe::ScaleMode_Mode_STRETCH) {
cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_));
} else {
@ -443,10 +448,12 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
}
std::unique_ptr<ImageFrame> output_frame(
new ImageFrame(input_img.Format(), output_width, output_height));
new ImageFrame(format, output_width, output_height));
cv::Mat output_mat = formats::MatView(output_frame.get());
flipped_mat.copyTo(output_mat);
cc->Outputs().Tag("IMAGE").Add(output_frame.release(), cc->InputTimestamp());
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_frame.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
@ -454,7 +461,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
::mediapipe::Status ImageTransformationCalculator::RenderGpu(
CalculatorContext* cc) {
#if !defined(MEDIAPIPE_DISABLE_GPU)
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<GpuBuffer>();
const auto& input = cc->Inputs().Tag(kGpuBufferTag).Get<GpuBuffer>();
const int input_width = input.width();
const int input_height = input.height();
@ -485,11 +492,11 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
{"video_frame_y", "video_frame_uv"}));
}
renderer = yuv_renderer_.get();
src1 = helper_.CreateSourceTexture(input, 0);
src1 = gpu_helper_.CreateSourceTexture(input, 0);
} else // NOLINT(readability/braces)
#endif // iOS
{
src1 = helper_.CreateSourceTexture(input);
src1 = gpu_helper_.CreateSourceTexture(input);
#if defined(TEXTURE_EXTERNAL_OES)
if (src1.target() == GL_TEXTURE_EXTERNAL_OES) {
if (!ext_rgb_renderer_) {
@ -515,10 +522,10 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
mediapipe::FrameRotation rotation =
mediapipe::FrameRotationFromDegrees(RotationModeToDegrees(rotation_));
auto dst = helper_.CreateDestinationTexture(output_width, output_height,
auto dst = gpu_helper_.CreateDestinationTexture(output_width, output_height,
input.format());
helper_.BindFramebuffer(dst); // GL_TEXTURE0
gpu_helper_.BindFramebuffer(dst); // GL_TEXTURE0
glActiveTexture(GL_TEXTURE1);
glBindTexture(src1.target(), src1.name());
@ -533,8 +540,8 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
// Execute GL commands, before getting result.
glFlush();
auto output = dst.GetFrame<GpuBuffer>();
cc->Outputs().Tag("IMAGE_GPU").Add(output.release(), cc->InputTimestamp());
auto output = dst.template GetFrame<GpuBuffer>();
cc->Outputs().Tag(kGpuBufferTag).Add(output.release(), cc->InputTimestamp());
#endif // !MEDIAPIPE_DISABLE_GPU

View File

@ -32,6 +32,11 @@
namespace {
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kMaskCpuTag[] = "MASK";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kMaskGpuTag[] = "MASK_GPU";
} // namespace
namespace mediapipe {
@ -112,39 +117,41 @@ REGISTER_CALCULATOR(RecolorCalculator);
bool use_gpu = false;
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().HasTag("IMAGE_GPU")) {
cc->Inputs().Tag("IMAGE_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kGpuBufferTag)) {
cc->Inputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag("IMAGE")) {
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Inputs().HasTag("MASK_GPU")) {
cc->Inputs().Tag("MASK_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kMaskGpuTag)) {
cc->Inputs().Tag(kMaskGpuTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag("MASK")) {
cc->Inputs().Tag("MASK").Set<ImageFrame>();
if (cc->Inputs().HasTag(kMaskCpuTag)) {
cc->Inputs().Tag(kMaskCpuTag).Set<ImageFrame>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
if (cc->Outputs().HasTag("IMAGE_GPU")) {
cc->Outputs().Tag("IMAGE_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Outputs().HasTag(kGpuBufferTag)) {
cc->Outputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Outputs().HasTag("IMAGE")) {
cc->Outputs().Tag("IMAGE").Set<ImageFrame>();
if (cc->Outputs().HasTag(kImageFrameTag)) {
cc->Outputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
// Confirm only one of the input streams is present.
RET_CHECK(cc->Inputs().HasTag("IMAGE") ^ cc->Inputs().HasTag("IMAGE_GPU"));
RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^
cc->Inputs().HasTag(kGpuBufferTag));
// Confirm only one of the output streams is present.
RET_CHECK(cc->Outputs().HasTag("IMAGE") ^ cc->Outputs().HasTag("IMAGE_GPU"));
RET_CHECK(cc->Outputs().HasTag(kImageFrameTag) ^
cc->Outputs().HasTag(kGpuBufferTag));
if (use_gpu) {
#if !defined(MEDIAPIPE_DISABLE_GPU)
@ -158,7 +165,7 @@ REGISTER_CALCULATOR(RecolorCalculator);
::mediapipe::Status RecolorCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag("IMAGE_GPU")) {
if (cc->Inputs().HasTag(kGpuBufferTag)) {
use_gpu_ = true;
#if !defined(MEDIAPIPE_DISABLE_GPU)
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
@ -201,12 +208,12 @@ REGISTER_CALCULATOR(RecolorCalculator);
}
::mediapipe::Status RecolorCalculator::RenderCpu(CalculatorContext* cc) {
if (cc->Inputs().Tag("MASK").IsEmpty()) {
if (cc->Inputs().Tag(kMaskCpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// Get inputs and setup output.
const auto& input_img = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
const auto& mask_img = cc->Inputs().Tag("MASK").Get<ImageFrame>();
const auto& input_img = cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
const auto& mask_img = cc->Inputs().Tag(kMaskCpuTag).Get<ImageFrame>();
cv::Mat input_mat = formats::MatView(&input_img);
cv::Mat mask_mat = formats::MatView(&mask_img);
@ -254,19 +261,21 @@ REGISTER_CALCULATOR(RecolorCalculator);
}
}
cc->Outputs().Tag("IMAGE").Add(output_img.release(), cc->InputTimestamp());
cc->Outputs()
.Tag(kImageFrameTag)
.Add(output_img.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
::mediapipe::Status RecolorCalculator::RenderGpu(CalculatorContext* cc) {
if (cc->Inputs().Tag("MASK_GPU").IsEmpty()) {
if (cc->Inputs().Tag(kMaskGpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
#if !defined(MEDIAPIPE_DISABLE_GPU)
// Get inputs and setup output.
const Packet& input_packet = cc->Inputs().Tag("IMAGE_GPU").Value();
const Packet& mask_packet = cc->Inputs().Tag("MASK_GPU").Value();
const Packet& input_packet = cc->Inputs().Tag(kGpuBufferTag).Value();
const Packet& mask_packet = cc->Inputs().Tag(kMaskGpuTag).Value();
const auto& input_buffer = input_packet.Get<mediapipe::GpuBuffer>();
const auto& mask_buffer = mask_packet.Get<mediapipe::GpuBuffer>();
@ -296,7 +305,7 @@ REGISTER_CALCULATOR(RecolorCalculator);
// Send result image in GPU packet.
auto output = dst_tex.GetFrame<mediapipe::GpuBuffer>();
cc->Outputs().Tag("IMAGE_GPU").Add(output.release(), cc->InputTimestamp());
cc->Outputs().Tag(kGpuBufferTag).Add(output.release(), cc->InputTimestamp());
// Cleanup
img_tex.Release();

View File

@ -243,6 +243,7 @@ cc_library(
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
],
"//conditions:default": [
"//mediapipe/util/tflite:tflite_gpu_runner",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",

View File

@ -63,6 +63,10 @@ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
ColMajorMatrixXf;
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
} // namespace
namespace mediapipe {
@ -124,6 +128,9 @@ struct GPUData {
// GPU tensors are currently only supported on mobile platforms.
// This calculator uses FixedSizeInputStreamHandler by default.
//
// Note: Input defines output, so only these type sets are supported:
// IMAGE -> TENSORS | IMAGE_GPU -> TENSORS_GPU | MATRIX -> TENSORS
//
class TfLiteConverterCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
@ -138,9 +145,9 @@ class TfLiteConverterCalculator : public CalculatorBase {
template <class T>
::mediapipe::Status NormalizeImage(const ImageFrame& image_frame,
bool zero_center, bool flip_vertically,
float* tensor_buffer);
float* tensor_ptr);
::mediapipe::Status CopyMatrixToTensor(const Matrix& matrix,
float* tensor_buffer);
float* tensor_ptr);
::mediapipe::Status ProcessCPU(CalculatorContext* cc);
::mediapipe::Status ProcessGPU(CalculatorContext* cc);
@ -166,33 +173,35 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::GetContract(
CalculatorContract* cc) {
const bool has_image_tag = cc->Inputs().HasTag("IMAGE");
const bool has_image_gpu_tag = cc->Inputs().HasTag("IMAGE_GPU");
const bool has_matrix_tag = cc->Inputs().HasTag("MATRIX");
// Confirm only one of the input streams is present.
RET_CHECK(has_image_tag ^ has_image_gpu_tag ^ has_matrix_tag &&
!(has_image_tag && has_image_gpu_tag && has_matrix_tag));
RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^
cc->Inputs().HasTag(kGpuBufferTag) ^ cc->Inputs().HasTag("MATRIX"));
// Confirm only one of the output streams is present.
RET_CHECK(cc->Outputs().HasTag("TENSORS") ^
cc->Outputs().HasTag("TENSORS_GPU"));
RET_CHECK(cc->Outputs().HasTag(kTensorsTag) ^
cc->Outputs().HasTag(kTensorsGpuTag));
bool use_gpu = false;
if (cc->Inputs().HasTag("IMAGE")) cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
if (cc->Inputs().HasTag("MATRIX")) cc->Inputs().Tag("MATRIX").Set<Matrix>();
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
if (cc->Inputs().HasTag("MATRIX")) {
cc->Inputs().Tag("MATRIX").Set<Matrix>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Inputs().HasTag("IMAGE_GPU")) {
cc->Inputs().Tag("IMAGE_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kGpuBufferTag)) {
cc->Inputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Outputs().HasTag("TENSORS"))
cc->Outputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
if (cc->Outputs().HasTag(kTensorsTag)) {
cc->Outputs().Tag(kTensorsTag).Set<std::vector<TfLiteTensor>>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Outputs().HasTag("TENSORS_GPU")) {
cc->Outputs().Tag("TENSORS_GPU").Set<std::vector<GpuTensor>>();
if (cc->Outputs().HasTag(kTensorsGpuTag)) {
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -216,8 +225,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
MP_RETURN_IF_ERROR(LoadOptions(cc));
if (cc->Inputs().HasTag("IMAGE_GPU") ||
cc->Outputs().HasTag("IMAGE_OUT_GPU")) {
if (cc->Inputs().HasTag(kGpuBufferTag) ||
cc->Outputs().HasTag(kGpuBufferTag)) {
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
use_gpu_ = true;
#else
@ -227,8 +236,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
if (use_gpu_) {
// Cannot mix CPU/GPU streams.
RET_CHECK(cc->Inputs().HasTag("IMAGE_GPU") &&
cc->Outputs().HasTag("TENSORS_GPU"));
RET_CHECK(cc->Inputs().HasTag(kGpuBufferTag) &&
cc->Outputs().HasTag(kTensorsGpuTag));
// Cannot use quantization.
use_quantized_tensors_ = false;
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
@ -248,7 +257,6 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::Process(CalculatorContext* cc) {
if (use_gpu_) {
// GpuBuffer to tflite::gpu::GlBuffer conversion.
if (!initialized_) {
MP_RETURN_IF_ERROR(InitGpu(cc));
initialized_ = true;
@ -259,7 +267,6 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
// Convert to CPU tensors or Matrix type.
MP_RETURN_IF_ERROR(ProcessCPU(cc));
}
return ::mediapipe::OkStatus();
}
@ -275,24 +282,26 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::ProcessCPU(
CalculatorContext* cc) {
if (cc->Inputs().HasTag("IMAGE")) {
if (cc->Inputs().HasTag(kImageFrameTag)) {
// CPU ImageFrame to TfLiteTensor conversion.
const auto& image_frame = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
const auto& image_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels_);
const mediapipe::ImageFormat::Format format = image_frame.Format();
if (!initialized_) {
if (!(image_frame.Format() == mediapipe::ImageFormat::SRGBA ||
image_frame.Format() == mediapipe::ImageFormat::SRGB ||
image_frame.Format() == mediapipe::ImageFormat::GRAY8 ||
image_frame.Format() == mediapipe::ImageFormat::VEC32F1))
if (!(format == mediapipe::ImageFormat::SRGBA ||
format == mediapipe::ImageFormat::SRGB ||
format == mediapipe::ImageFormat::GRAY8 ||
format == mediapipe::ImageFormat::VEC32F1))
RET_CHECK_FAIL() << "Unsupported CPU input format.";
TfLiteQuantization quant;
if (use_quantized_tensors_) {
RET_CHECK(image_frame.Format() != mediapipe::ImageFormat::VEC32F1)
RET_CHECK(format != mediapipe::ImageFormat::VEC32F1)
<< "Only 8-bit input images are supported for quantization.";
quant.type = kTfLiteAffineQuantization;
quant.params = nullptr;
@ -349,8 +358,9 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
auto output_tensors = absl::make_unique<std::vector<TfLiteTensor>>();
output_tensors->emplace_back(*tensor);
cc->Outputs().Tag("TENSORS").Add(output_tensors.release(),
cc->InputTimestamp());
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
} else if (cc->Inputs().HasTag("MATRIX")) {
// CPU Matrix to TfLiteTensor conversion.
@ -371,15 +381,16 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
interpreter_->ResizeInputTensor(tensor_idx, {height, width, channels});
interpreter_->AllocateTensors();
float* tensor_buffer = tensor->data.f;
RET_CHECK(tensor_buffer);
float* tensor_ptr = tensor->data.f;
RET_CHECK(tensor_ptr);
MP_RETURN_IF_ERROR(CopyMatrixToTensor(matrix, tensor_buffer));
MP_RETURN_IF_ERROR(CopyMatrixToTensor(matrix, tensor_ptr));
auto output_tensors = absl::make_unique<std::vector<TfLiteTensor>>();
output_tensors->emplace_back(*tensor);
cc->Outputs().Tag("TENSORS").Add(output_tensors.release(),
cc->InputTimestamp());
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
}
return ::mediapipe::OkStatus();
@ -389,7 +400,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
CalculatorContext* cc) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
// GpuBuffer to tflite::gpu::GlBuffer conversion.
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<mediapipe::GpuBuffer>();
const auto& input =
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this, &input]() -> ::mediapipe::Status {
// Convert GL texture into TfLite GlBuffer (SSBO).
@ -421,11 +433,12 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
return ::mediapipe::OkStatus();
}));
cc->Outputs()
.Tag("TENSORS_GPU")
.Tag(kTensorsGpuTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#elif defined(MEDIAPIPE_IOS)
// GpuBuffer to id<MTLBuffer> conversion.
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<mediapipe::GpuBuffer>();
const auto& input =
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
id<MTLTexture> src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input];
@ -457,7 +470,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
commandBuffer:command_buffer];
cc->Outputs()
.Tag("TENSORS_GPU")
.Tag(kTensorsGpuTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#else
RET_CHECK_FAIL() << "GPU processing is not enabled.";
@ -469,7 +482,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::InitGpu(CalculatorContext* cc) {
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
// Get input image sizes.
const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get<mediapipe::GpuBuffer>();
const auto& input =
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
mediapipe::ImageFormat::Format format =
mediapipe::ImageFormatForGpuBufferFormat(input.format());
gpu_data_out_ = absl::make_unique<GPUData>();
@ -612,7 +626,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
CHECK_LE(max_num_channels_, 4);
CHECK_NE(max_num_channels_, 2);
#if defined(MEDIAPIPE_IOS)
if (cc->Inputs().HasTag("IMAGE_GPU"))
if (cc->Inputs().HasTag(kGpuBufferTag))
// Currently on iOS, tflite gpu input tensor must be 4 channels,
// so input image must be 4 channels also (checked in InitGpu).
max_num_channels_ = 4;
@ -627,7 +641,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
template <class T>
::mediapipe::Status TfLiteConverterCalculator::NormalizeImage(
const ImageFrame& image_frame, bool zero_center, bool flip_vertically,
float* tensor_buffer) {
float* tensor_ptr) {
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
@ -651,7 +665,7 @@ template <class T>
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_buffer++ = *image_ptr++ / div - sub;
*tensor_ptr++ = *image_ptr++ / div - sub;
}
image_ptr += channels_ignored;
}
@ -661,14 +675,14 @@ template <class T>
}
::mediapipe::Status TfLiteConverterCalculator::CopyMatrixToTensor(
const Matrix& matrix, float* tensor_buffer) {
const Matrix& matrix, float* tensor_ptr) {
if (row_major_matrix_) {
auto matrix_map = Eigen::Map<RowMajorMatrixXf>(tensor_buffer, matrix.rows(),
matrix.cols());
auto matrix_map =
Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
} else {
auto matrix_map = Eigen::Map<ColMajorMatrixXf>(tensor_buffer, matrix.rows(),
matrix.cols());
auto matrix_map =
Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
}

View File

@ -36,6 +36,7 @@
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/util/tflite/tflite_gpu_runner.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
@ -75,6 +76,9 @@ typedef id<MTLBuffer> GpuTensor;
// Round up n to next multiple of m.
size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
} // namespace
#if defined(MEDIAPIPE_EDGE_TPU)
@ -219,6 +223,7 @@ class TfLiteInferenceCalculator : public CalculatorBase {
::mediapipe::Status LoadModel(CalculatorContext* cc);
::mediapipe::StatusOr<Packet> GetModelAsPacket(const CalculatorContext& cc);
::mediapipe::Status LoadDelegate(CalculatorContext* cc);
::mediapipe::Status InitTFLiteGPURunner();
Packet model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_;
@ -228,6 +233,7 @@ class TfLiteInferenceCalculator : public CalculatorBase {
mediapipe::GlCalculatorHelper gpu_helper_;
std::vector<std::unique_ptr<GPUData>> gpu_data_in_;
std::vector<std::unique_ptr<GPUData>> gpu_data_out_;
std::unique_ptr<tflite::gpu::TFLiteGPURunner> tflite_gpu_runner_;
#elif defined(MEDIAPIPE_IOS)
MPPMetalHelper* gpu_helper_ = nullptr;
std::vector<std::unique_ptr<GPUData>> gpu_data_in_;
@ -245,6 +251,8 @@ class TfLiteInferenceCalculator : public CalculatorBase {
bool gpu_input_ = false;
bool gpu_output_ = false;
bool use_quantized_tensors_ = false;
bool use_advanced_gpu_api_ = false;
};
REGISTER_CALCULATOR(TfLiteInferenceCalculator);
@ -252,10 +260,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
::mediapipe::Status TfLiteInferenceCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag("TENSORS") ^
cc->Inputs().HasTag("TENSORS_GPU"));
RET_CHECK(cc->Outputs().HasTag("TENSORS") ^
cc->Outputs().HasTag("TENSORS_GPU"));
RET_CHECK(cc->Inputs().HasTag(kTensorsTag) ^
cc->Inputs().HasTag(kTensorsGpuTag));
RET_CHECK(cc->Outputs().HasTag(kTensorsTag) ^
cc->Outputs().HasTag(kTensorsGpuTag));
const auto& options =
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
@ -266,26 +274,26 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
bool use_gpu =
options.has_delegate() ? options.delegate().has_gpu() : options.use_gpu();
if (cc->Inputs().HasTag("TENSORS"))
cc->Inputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
if (cc->Inputs().HasTag(kTensorsTag))
cc->Inputs().Tag(kTensorsTag).Set<std::vector<TfLiteTensor>>();
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Inputs().HasTag("TENSORS_GPU")) {
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
RET_CHECK(!options.has_delegate() || options.delegate().has_gpu())
<< "GPU input is compatible with GPU delegate only.";
cc->Inputs().Tag("TENSORS_GPU").Set<std::vector<GpuTensor>>();
cc->Inputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Outputs().HasTag("TENSORS"))
cc->Outputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
if (cc->Outputs().HasTag(kTensorsTag))
cc->Outputs().Tag(kTensorsTag).Set<std::vector<TfLiteTensor>>();
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Outputs().HasTag("TENSORS_GPU")) {
if (cc->Outputs().HasTag(kTensorsGpuTag)) {
RET_CHECK(!options.has_delegate() || options.delegate().has_gpu())
<< "GPU output is compatible with GPU delegate only.";
cc->Outputs().Tag("TENSORS_GPU").Set<std::vector<GpuTensor>>();
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -320,27 +328,31 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
gpu_inference_ = options.use_gpu();
if (cc->Inputs().HasTag("TENSORS_GPU")) {
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
gpu_input_ = true;
gpu_inference_ = true; // Inference must be on GPU also.
#else
RET_CHECK(!cc->Inputs().HasTag("TENSORS_GPU"))
RET_CHECK(!cc->Inputs().HasTag(kTensorsGpuTag))
<< "GPU processing not enabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
}
if (cc->Outputs().HasTag("TENSORS_GPU")) {
if (cc->Outputs().HasTag(kTensorsGpuTag)) {
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
gpu_output_ = true;
RET_CHECK(cc->Inputs().HasTag("TENSORS_GPU"))
RET_CHECK(cc->Inputs().HasTag(kTensorsGpuTag))
<< "GPU output must also have GPU Input.";
#else
RET_CHECK(!cc->Inputs().HasTag("TENSORS_GPU"))
RET_CHECK(!cc->Inputs().HasTag(kTensorsGpuTag))
<< "GPU processing not enabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
}
const auto& calculator_opts =
cc->Options<mediapipe::TfLiteInferenceCalculatorOptions>();
use_advanced_gpu_api_ = false;
MP_RETURN_IF_ERROR(LoadModel(cc));
if (gpu_inference_) {
@ -352,8 +364,12 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
#endif
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &cc]() -> ::mediapipe::Status { return LoadDelegate(cc); }));
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
return use_advanced_gpu_api_ ? InitTFLiteGPURunner()
: LoadDelegate(cc);
}));
if (use_advanced_gpu_api_) return ::mediapipe::OkStatus();
#else
MP_RETURN_IF_ERROR(LoadDelegate(cc));
#endif
@ -365,13 +381,51 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
return ::mediapipe::OkStatus();
}
::mediapipe::Status TfLiteInferenceCalculator::InitTFLiteGPURunner() {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
// Create and bind OpenGL buffers for outputs.
// These buffers are created onve and later their ids are jut passed to the
// calculator outputs.
gpu_data_out_.resize(tflite_gpu_runner_->outputs_size());
for (int i = 0; i < tflite_gpu_runner_->outputs_size(); ++i) {
gpu_data_out_[i] = absl::make_unique<GPUData>();
ASSIGN_OR_RETURN(gpu_data_out_[i]->elements,
tflite_gpu_runner_->GetOutputElements(i));
// Create and bind input buffer.
RET_CHECK_CALL(::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer<float>(
gpu_data_out_[i]->elements, &gpu_data_out_[i]->buffer));
}
RET_CHECK_CALL(tflite_gpu_runner_->Build());
#endif
return ::mediapipe::OkStatus();
}
::mediapipe::Status TfLiteInferenceCalculator::Process(CalculatorContext* cc) {
// 1. Receive pre-processed tensor inputs.
if (gpu_input_) {
// Read GPU input into SSBO.
if (use_advanced_gpu_api_) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
RET_CHECK(input_tensors.empty());
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors]() -> ::mediapipe::Status {
for (int i = 0; i < input_tensors.size(); ++i) {
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToInputTensor(
input_tensors[i].id(), i));
}
for (int i = 0; i < gpu_data_out_.size(); ++i) {
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToOutputTensor(
gpu_data_out_[i]->buffer.id(), i));
}
return ::mediapipe::OkStatus();
}));
#endif
} else if (gpu_input_) {
// Read GPU input into SSBO.
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
const auto& input_tensors =
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GT(input_tensors.size(), 0);
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors]() -> ::mediapipe::Status {
@ -386,7 +440,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
}));
#elif defined(MEDIAPIPE_IOS)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GT(input_tensors.size(), 0);
// Explicit copy input with conversion float 32 bits to 16 bits.
gpu_data_in_.resize(input_tensors.size());
@ -413,7 +467,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
} else {
// Read CPU input into tensors.
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<TfLiteTensor>>();
cc->Inputs().Tag(kTensorsTag).Get<std::vector<TfLiteTensor>>();
RET_CHECK_GT(input_tensors.size(), 0);
for (int i = 0; i < input_tensors.size(); ++i) {
const TfLiteTensor* input_tensor = &input_tensors[i];
@ -437,7 +491,11 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status {
if (use_advanced_gpu_api_) {
RET_CHECK(tflite_gpu_runner_->Invoke().ok());
} else {
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
}
return ::mediapipe::OkStatus();
}));
#elif defined(MEDIAPIPE_IOS)
@ -448,7 +506,18 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
}
// 3. Output processed tensors.
if (gpu_output_) {
if (use_advanced_gpu_api_) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
auto output_tensors = absl::make_unique<std::vector<GpuTensor>>();
output_tensors->resize(gpu_data_out_.size());
for (int i = 0; i < gpu_data_out_.size(); ++i) {
output_tensors->at(i) = gpu_data_out_[0]->buffer.MakeRef();
}
cc->Outputs()
.Tag("TENSORS_GPU")
.Add(output_tensors.release(), cc->InputTimestamp());
#endif
} else if (gpu_output_) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
// Output result tensors (GPU).
auto output_tensors = absl::make_unique<std::vector<GpuTensor>>();
@ -464,7 +533,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
return ::mediapipe::OkStatus();
}));
cc->Outputs()
.Tag("TENSORS_GPU")
.Tag(kTensorsGpuTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#elif defined(MEDIAPIPE_IOS)
// Output result tensors (GPU).
@ -488,7 +557,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
[convert_command endEncoding];
[command_buffer commit];
cc->Outputs()
.Tag("TENSORS_GPU")
.Tag(kTensorsGpuTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#else
RET_CHECK_FAIL() << "GPU processing not enabled.";
@ -501,8 +570,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
TfLiteTensor* tensor = interpreter_->tensor(tensor_indexes[i]);
output_tensors->emplace_back(*tensor);
}
cc->Outputs().Tag("TENSORS").Add(output_tensors.release(),
cc->InputTimestamp());
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
}
return ::mediapipe::OkStatus();
@ -557,6 +627,20 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
.Tag("CUSTOM_OP_RESOLVER")
.Get<tflite::ops::builtin::BuiltinOpResolver>();
}
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
if (use_advanced_gpu_api_) {
tflite::gpu::InferenceOptions options;
options.priority1 = tflite::gpu::InferencePriority::MIN_LATENCY;
options.priority2 = tflite::gpu::InferencePriority::AUTO;
options.priority3 = tflite::gpu::InferencePriority::AUTO;
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
tflite_gpu_runner_ =
std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
return tflite_gpu_runner_->InitializeWithModel(model);
}
#endif
#if defined(MEDIAPIPE_EDGE_TPU)
interpreter_ =
BuildEdgeTpuInterpreter(model, &op_resolver, edgetpu_context_.get());

View File

@ -42,7 +42,11 @@ message TfLiteInferenceCalculatorOptions {
message TfLite {}
// Delegate to run GPU inference depending on the device.
// (Can use OpenGl, OpenCl, Metal depending on the device.)
message Gpu {}
message Gpu {
// Experimental, Android/Linux only. Use TFLite GPU delegate API2 for
// the NN inference.
optional bool use_advanced_gpu_api = 1 [default = false];
}
// Android only.
message Nnapi {}
message Xnnpack {

View File

@ -47,10 +47,11 @@
#endif // iOS
namespace {
constexpr int kNumInputTensorsWithAnchors = 3;
constexpr int kNumCoordsPerBox = 4;
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
} // namespace
namespace mediapipe {
@ -200,13 +201,13 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
bool use_gpu = false;
if (cc->Inputs().HasTag("TENSORS")) {
cc->Inputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
if (cc->Inputs().HasTag(kTensorsTag)) {
cc->Inputs().Tag(kTensorsTag).Set<std::vector<TfLiteTensor>>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Inputs().HasTag("TENSORS_GPU")) {
cc->Inputs().Tag("TENSORS_GPU").Set<std::vector<GpuTensor>>();
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
cc->Inputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -236,7 +237,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag("TENSORS_GPU")) {
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
gpu_input_ = true;
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
@ -258,8 +259,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
::mediapipe::Status TfLiteTensorsToDetectionsCalculator::Process(
CalculatorContext* cc) {
if ((!gpu_input_ && cc->Inputs().Tag("TENSORS").IsEmpty()) ||
(gpu_input_ && cc->Inputs().Tag("TENSORS_GPU").IsEmpty())) {
if ((!gpu_input_ && cc->Inputs().Tag(kTensorsTag).IsEmpty()) ||
(gpu_input_ && cc->Inputs().Tag(kTensorsGpuTag).IsEmpty())) {
return ::mediapipe::OkStatus();
}
@ -284,7 +285,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessCPU(
CalculatorContext* cc, std::vector<Detection>* output_detections) {
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<TfLiteTensor>>();
cc->Inputs().Tag(kTensorsTag).Get<std::vector<TfLiteTensor>>();
if (input_tensors.size() == 2 ||
input_tensors.size() == kNumInputTensorsWithAnchors) {
@ -402,7 +403,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
CalculatorContext* cc, std::vector<Detection>* output_detections) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GE(input_tensors.size(), 2);
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &input_tensors, &cc,
@ -466,7 +467,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator);
#elif defined(MEDIAPIPE_IOS)
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GE(input_tensors.size(), 2);
// Copy inputs.

View File

@ -49,6 +49,16 @@ int NumGroups(const int size, const int group_size) { // NOLINT
float Clamp(float val, float min, float max) {
return std::min(std::max(val, min), max);
}
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
constexpr char kSizeImageTag[] = "REFERENCE_IMAGE";
constexpr char kSizeImageGpuTag[] = "REFERENCE_IMAGE_GPU";
constexpr char kMaskTag[] = "MASK";
constexpr char kMaskGpuTag[] = "MASK_GPU";
constexpr char kPrevMaskTag[] = "PREV_MASK";
constexpr char kPrevMaskGpuTag[] = "PREV_MASK_GPU";
} // namespace
namespace mediapipe {
@ -148,39 +158,39 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
bool use_gpu = false;
// Inputs CPU.
if (cc->Inputs().HasTag("TENSORS")) {
cc->Inputs().Tag("TENSORS").Set<std::vector<TfLiteTensor>>();
if (cc->Inputs().HasTag(kTensorsTag)) {
cc->Inputs().Tag(kTensorsTag).Set<std::vector<TfLiteTensor>>();
}
if (cc->Inputs().HasTag("PREV_MASK")) {
cc->Inputs().Tag("PREV_MASK").Set<ImageFrame>();
if (cc->Inputs().HasTag(kPrevMaskTag)) {
cc->Inputs().Tag(kPrevMaskTag).Set<ImageFrame>();
}
if (cc->Inputs().HasTag("REFERENCE_IMAGE")) {
cc->Inputs().Tag("REFERENCE_IMAGE").Set<ImageFrame>();
if (cc->Inputs().HasTag(kSizeImageTag)) {
cc->Inputs().Tag(kSizeImageTag).Set<ImageFrame>();
}
// Inputs GPU.
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
if (cc->Inputs().HasTag("TENSORS_GPU")) {
cc->Inputs().Tag("TENSORS_GPU").Set<std::vector<GlBuffer>>();
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
cc->Inputs().Tag(kTensorsGpuTag).Set<std::vector<GlBuffer>>();
use_gpu |= true;
}
if (cc->Inputs().HasTag("PREV_MASK_GPU")) {
cc->Inputs().Tag("PREV_MASK_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kPrevMaskGpuTag)) {
cc->Inputs().Tag(kPrevMaskGpuTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
if (cc->Inputs().HasTag("REFERENCE_IMAGE_GPU")) {
cc->Inputs().Tag("REFERENCE_IMAGE_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Inputs().HasTag(kSizeImageGpuTag)) {
cc->Inputs().Tag(kSizeImageGpuTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
// Outputs.
if (cc->Outputs().HasTag("MASK")) {
cc->Outputs().Tag("MASK").Set<ImageFrame>();
if (cc->Outputs().HasTag(kMaskTag)) {
cc->Outputs().Tag(kMaskTag).Set<ImageFrame>();
}
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
if (cc->Outputs().HasTag("MASK_GPU")) {
cc->Outputs().Tag("MASK_GPU").Set<mediapipe::GpuBuffer>();
if (cc->Outputs().HasTag(kMaskGpuTag)) {
cc->Outputs().Tag(kMaskGpuTag).Set<mediapipe::GpuBuffer>();
use_gpu |= true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
@ -197,7 +207,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
if (cc->Inputs().HasTag("TENSORS_GPU")) {
if (cc->Inputs().HasTag(kTensorsGpuTag)) {
use_gpu_ = true;
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
@ -255,23 +265,22 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
::mediapipe::Status TfLiteTensorsToSegmentationCalculator::ProcessCpu(
CalculatorContext* cc) {
if (cc->Inputs().Tag("TENSORS").IsEmpty()) {
if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// Get input streams.
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<TfLiteTensor>>();
const bool has_prev_mask = cc->Inputs().HasTag("PREV_MASK") &&
!cc->Inputs().Tag("PREV_MASK").IsEmpty();
cc->Inputs().Tag(kTensorsTag).Get<std::vector<TfLiteTensor>>();
const bool has_prev_mask = cc->Inputs().HasTag(kPrevMaskTag) &&
!cc->Inputs().Tag(kPrevMaskTag).IsEmpty();
const ImageFrame placeholder;
const auto& input_mask = has_prev_mask
? cc->Inputs().Tag("PREV_MASK").Get<ImageFrame>()
const auto& input_mask =
has_prev_mask ? cc->Inputs().Tag(kPrevMaskTag).Get<ImageFrame>()
: placeholder;
int output_width = tensor_width_, output_height = tensor_height_;
if (cc->Inputs().HasTag("REFERENCE_IMAGE")) {
const auto& input_image =
cc->Inputs().Tag("REFERENCE_IMAGE").Get<ImageFrame>();
if (cc->Inputs().HasTag(kSizeImageTag)) {
const auto& input_image = cc->Inputs().Tag(kSizeImageTag).Get<ImageFrame>();
output_width = input_image.Width();
output_height = input_image.Height();
}
@ -353,7 +362,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
ImageFormat::SRGBA, output_width, output_height);
cv::Mat output_mat = formats::MatView(output_mask.get());
large_mask_mat.copyTo(output_mat);
cc->Outputs().Tag("MASK").Add(output_mask.release(), cc->InputTimestamp());
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
@ -364,23 +373,23 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
// 3. upsample small mask into output mask to be same size as input image
::mediapipe::Status TfLiteTensorsToSegmentationCalculator::ProcessGpu(
CalculatorContext* cc) {
if (cc->Inputs().Tag("TENSORS_GPU").IsEmpty()) {
if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
// Get input streams.
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GlBuffer>>();
const bool has_prev_mask = cc->Inputs().HasTag("PREV_MASK_GPU") &&
!cc->Inputs().Tag("PREV_MASK_GPU").IsEmpty();
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GlBuffer>>();
const bool has_prev_mask = cc->Inputs().HasTag(kPrevMaskGpuTag) &&
!cc->Inputs().Tag(kPrevMaskGpuTag).IsEmpty();
const auto& input_mask =
has_prev_mask
? cc->Inputs().Tag("PREV_MASK_GPU").Get<mediapipe::GpuBuffer>()
? cc->Inputs().Tag(kPrevMaskGpuTag).Get<mediapipe::GpuBuffer>()
: mediapipe::GpuBuffer();
int output_width = tensor_width_, output_height = tensor_height_;
if (cc->Inputs().HasTag("REFERENCE_IMAGE_GPU")) {
if (cc->Inputs().HasTag(kSizeImageGpuTag)) {
const auto& input_image =
cc->Inputs().Tag("REFERENCE_IMAGE_GPU").Get<mediapipe::GpuBuffer>();
cc->Inputs().Tag(kSizeImageGpuTag).Get<mediapipe::GpuBuffer>();
output_width = input_image.width();
output_height = input_image.height();
}
@ -441,7 +450,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator);
// Send out image as GPU packet.
auto output_image = output_texture.GetFrame<mediapipe::GpuBuffer>();
cc->Outputs()
.Tag("MASK_GPU")
.Tag(kMaskGpuTag)
.Add(output_image.release(), cc->InputTimestamp());
// Cleanup

View File

@ -121,6 +121,14 @@ and model details are described in the
* [Android](./hair_segmentation_mobile_gpu.md)
### Template Matching using KNIFT with CPU
[Template Matching using KNIFT on Mobile](./template_matching_mobile_cpu.md)
shows how to use MediaPipe with TFLite model for template matching using Knift
on mobile using CPU.
* [Android](./template_matching_mobile_cpu.md)
## Desktop
### Hello World for C++
@ -171,7 +179,6 @@ on desktop with webcam input.
* [Desktop GPU](./face_mesh_desktop.md)
* [Desktop CPU](./face_mesh_desktop.md)
### Hand Tracking on Desktop with Webcam
[Hand Tracking on Desktop with Webcam](./hand_tracking_desktop.md) shows how to
@ -198,7 +205,7 @@ GPU with live video from a webcam.
* [Desktop GPU](./hair_segmentation_desktop.md)
## Google Coral (machine learning acceleration with Google EdgeTPU)
## Google Coral (ML acceleration with Google EdgeTPU)
Below are code samples on how to run MediaPipe on Google Coral Dev Board.

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -0,0 +1,31 @@
# Template Matching using KNIFT on Desktop
This doc focuses on the
[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/template_matching/template_matching_desktop.pbtxt)
that performs template matching with KNIFT (Keypoint Neural Invariant Feature
Transform) on desktop CPU.
If you are interested in more detail about KNIFT or running the example on
mobile, please see
[Template Matching using KNIFT on Mobile (CPU)](template_matching_mobile_cpu.md).
To build the desktop app, run:
```bash
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/template_matching:template_matching_tflite
```
To run the desktop app, please specify a template index file
([example](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_index.pb)) and a
video to be matched. For how to build your own index file, please see
[here](template_matching_mobile_cpu.md#build-index-file).
```bash
$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/template_matching/template_matching_tflite \
--calculator_graph_config_file=mediapipe/graphs/template_matching/template_matching_desktop.pbtxt --input_side_packets="input_video_path=<input video path>,output_video_path=<output video path>"
```
## Graph
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/template_matching/template_matching_desktop.pbtxt)

View File

@ -0,0 +1,94 @@
# Template Matching using KNIFT on Mobile (CPU)
This doc focuses on the
[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/template_matching/template_matching_mobile_cpu.pbtxt)
that performs template matching with KNIFT (Keypoint Neural Invariant Feature
Transform) on mobile CPU.
![template_matching_mobile_cpu.gif](images/mobile/template_matching_android_cpu.gif)
In the visualization above, the green dots represent detected keypoints on each
frame and the red box represents the targets matched by templates using KNIFT
features (see also [model card](https://mediapipe.page.link/knift-mc)). For more
information, please see
[Google Developers Blog](https://mediapipe.page.link/knift-blog).
## Build Index Files
In MediaPipe, we've already provided a file in
[knift_index.pb](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_index.pb),
pre-computed from the 3 template images (of USD bills) shown below. If you'd
like to use your own template images, please follow the steps below, or
otherwise you can jump directly to [Android](#android).
![template_matching_mobile_template.jpg](images/mobile/template_matching_mobile_template.jpg)
### Step 1:
Put all template images in a single directory.
### Step 2:
To build the index file for all templates in the directory, run:
```bash
$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/template_matching:template_matching_tflite
$ bazel-bin/mediapipe/examples/desktop/template_matching/template_matching_tflite \
--calculator_graph_config_file=mediapipe/graphs/template_matching/index_building.pbtxt \
--input_side_packets="file_directory=<template image directory>,file_suffix='png',output_index_filename=<output index filename>"
```
The output index file includes the extracted KNIFT features.
### Step 3:
Replace
[mediapipe/models/knift_index.pb](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_index.pb)
with the index file you generated, and update
[mediapipe/models/knift_labelmap.txt](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_labelmap.txt)
with your own template names.
## Android
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu)
A prebuilt arm64 APK can be
[downloaded here](https://drive.google.com/open?id=1tSWRfes9rAM4NrzmJBplguNQQvaeBZSa).
To build and install the app yourself, run:
Note: MediaPipe uses OpenCV 3 by default. However, because of
[issues](https://github.com/opencv/opencv/issues/11488) between NDK 17+ and
OpenCV 3 when using
[knnMatch](https://docs.opencv.org/3.4/db/d39/classcv_1_1DescriptorMatcher.html#a378f35c9b1a5dfa4022839a45cdf0e89),
please use the following commands to temporarily switch to OpenCV 4 for the
template matching exmaple on Android, and switch back to OpenCV 3 afterwards.
```bash
# Switch to OpenCV 4
sed -i -e 's:3.4.3/opencv-3.4.3:4.0.1/opencv-4.0.1:g' WORKSPACE
sed -i -e 's:libopencv_java3:libopencv_java4:g' third_party/opencv_android.BUILD
# Build and install app
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu:templatematchingcpu
adb install -r bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu/templatematchingcpu.apk
# Switch back to OpenCV 3
sed -i -e 's:4.0.1/opencv-4.0.1:3.4.3/opencv-3.4.3:g' WORKSPACE
sed -i -e 's:libopencv_java4:libopencv_java3:g' third_party/opencv_android.BUILD
```
## Use XNNPACK Delegate
The example uses XNNPACK delegate by default. Users can change the
[option in TfLiteInferenceCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/calculators/tflite/tflite_inference_calculator.proto)
to use default TF Lite inference.
## Graph
### Main Graph
![template_matching_mobile_graph](images/mobile/template_matching_mobile_graph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/template_matching/template_matching_mobile_cpu.pbtxt)

View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.templatematchingcpu">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<!-- For MediaPipe -->
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
<application
android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -0,0 +1,82 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/template_matching:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
# easily incorporated into the app via, for example,
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/template_matching:mobile_cpu_binary_graph"],
outs = ["templatematching.binarypb"],
cmd = "cp $< $@",
)
android_library(
name = "mediapipe_lib",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
"//mediapipe/models:knift_index.pb",
"//mediapipe/models:knift_float.tflite",
"//mediapipe/models:knift_labelmap.txt",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
deps = [
":mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_lifecycle_lifecycle_common",
"@maven//:com_google_guava_guava",
],
)
android_binary(
name = "templatematchingcpu",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.templatematchingcpu"},
multidex = "native",
deps = [
":mediapipe_lib",
],
)

View File

@ -0,0 +1,170 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.templatematchingcpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.glutil.EglManager;
/** Main activity of MediaPipe example apps. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private static final String BINARY_GRAPH_NAME = "templatematching.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
private static final boolean FLIP_FRAMES_VERTICALLY = true;
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java4");
}
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
private SurfaceView previewDisplayView;
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@Override
protected void onResume() {
super.onResume();
converter = new ExternalTextureConverter(eglManager.getContext());
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
converter.setConsumer(processor);
if (PermissionHelper.cameraPermissionsGranted(this)) {
startCamera();
}
}
@Override
protected void onPause() {
super.onPause();
converter.close();
}
@Override
public void onRequestPermissionsResult(
int requestCode, String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
viewGroup.addView(previewDisplayView);
previewDisplayView
.getHolder()
.addCallback(
new SurfaceHolder.Callback() {
@Override
public void surfaceCreated(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
// (Re-)Compute the ideal size of the camera-preview display (the area that the
// camera-preview frames get rendered onto, potentially with scaling and rotation)
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(null);
}
});
}
private void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
}

View File

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent">
<FrameLayout
android:id="@+id/preview_display_layout"
android:layout_width="fill_parent"
android:layout_height="fill_parent"
android:layout_weight="1">
<TextView
android:id="@+id/no_camera_access_view"
android:layout_height="fill_parent"
android:layout_width="fill_parent"
android:gravity="center"
android:text="@string/no_camera_access" />
</FrameLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#008577</color>
<color name="colorPrimaryDark">#00574B</color>
<color name="colorAccent">#D81B60</color>
</resources>

View File

@ -0,0 +1,4 @@
<resources>
<string name="app_name" translatable="false">Template Matching CPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -0,0 +1,11 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
</resources>

View File

@ -0,0 +1,25 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
cc_binary(
name = "template_matching_tflite",
deps = [
"//mediapipe/examples/desktop:simple_run_graph_main",
"//mediapipe/graphs/template_matching:desktop_calculators",
],
)

View File

@ -320,7 +320,7 @@ CalculatorGraph::~CalculatorGraph() {
}
if (!::mediapipe::ContainsKey(executors_, "")) {
MP_RETURN_IF_ERROR(InitializeDefaultExecutor(*default_executor_options,
MP_RETURN_IF_ERROR(InitializeDefaultExecutor(default_executor_options,
use_application_thread));
}
@ -328,7 +328,7 @@ CalculatorGraph::~CalculatorGraph() {
}
::mediapipe::Status CalculatorGraph::InitializeDefaultExecutor(
const ThreadPoolExecutorOptions& default_executor_options,
const ThreadPoolExecutorOptions* default_executor_options,
bool use_application_thread) {
// If specified, run synchronously on the calling thread.
if (use_application_thread) {
@ -341,7 +341,9 @@ CalculatorGraph::~CalculatorGraph() {
}
// Check the number of threads specified in the proto.
int num_threads = default_executor_options.num_threads();
int num_threads = default_executor_options == nullptr
? 0
: default_executor_options->num_threads();
// If the default (0 or -1) was specified, pick a suitable number of threads
// depending on the number of processors in this system and the number of
@ -1215,12 +1217,14 @@ Packet CalculatorGraph::GetServicePacket(const GraphServiceBase& service) {
}
::mediapipe::Status CalculatorGraph::CreateDefaultThreadPool(
const ThreadPoolExecutorOptions& default_executor_options,
const ThreadPoolExecutorOptions* default_executor_options,
int num_threads) {
MediaPipeOptions extendable_options;
ThreadPoolExecutorOptions* options =
extendable_options.MutableExtension(ThreadPoolExecutorOptions::ext);
*options = default_executor_options;
if (default_executor_options != nullptr) {
options->CopyFrom(*default_executor_options);
}
options->set_num_threads(num_threads);
// clang-format off
ASSIGN_OR_RETURN(Executor* executor,

View File

@ -461,13 +461,13 @@ class CalculatorGraph {
//
// Only called by InitializeExecutors().
::mediapipe::Status InitializeDefaultExecutor(
const ThreadPoolExecutorOptions& default_executor_options,
const ThreadPoolExecutorOptions* default_executor_options,
bool use_application_thread);
// Creates a thread pool as the default executor. The num_threads argument
// overrides the num_threads field in default_executor_options.
::mediapipe::Status CreateDefaultThreadPool(
const ThreadPoolExecutorOptions& default_executor_options,
const ThreadPoolExecutorOptions* default_executor_options,
int num_threads);
// Returns true if |name| is a reserved executor name.

View File

@ -274,6 +274,11 @@ bool GlContext::HasGlExtension(absl::string_view extension) const {
}
return Run([this]() -> ::mediapipe::Status {
// Clear any GL errors at this point: as this is a fresh context
// there shouldn't be any, but if we adopted an existing context (e.g. in
// some Emscripten cases), there might be some existing tripped error.
ForceClearExistingGlErrors();
absl::string_view version_string(
reinterpret_cast<const char*>(glGetString(GL_VERSION)));
@ -769,10 +774,18 @@ bool GlContext::SyncTokenIsReady(const std::shared_ptr<GlSyncPoint>& token) {
return token->IsReady();
}
bool GlContext::CheckForGlErrors() {
void GlContext::ForceClearExistingGlErrors() {
LogUncheckedGlErrors(CheckForGlErrors(/*force=*/true));
}
bool GlContext::CheckForGlErrors() { return CheckForGlErrors(false); }
bool GlContext::CheckForGlErrors(bool force) {
#if UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
if (!force) {
LOG_FIRST_N(WARNING, 1) << "MediaPipe OpenGL error checking is disabled";
return false;
}
#endif
if (!HasContext()) return false;

View File

@ -348,7 +348,20 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
void DestroyContext();
bool HasContext() const;
// This function clears out any tripped gl Errors and just logs them. This
// is used by code that needs to check glGetError() to know if it succeeded,
// but can't rely on the existing state to be 'clean'.
void ForceClearExistingGlErrors();
// Returns true if there were any GL errors. Note that this may be a no-op
// for performance reasons in some contexts (specifically Emscripten opt).
bool CheckForGlErrors();
// Same as `CheckForGLErrors()` but with the option of forcing the check
// even if we would otherwise skip for performance reasons.
bool CheckForGlErrors(bool force);
void LogUncheckedGlErrors(bool had_gl_errors);
::mediapipe::Status GetGlExtensions();
::mediapipe::Status GetGlExtensionsCompat();

View File

@ -36,9 +36,9 @@ namespace mediapipe {
// - GlRender(), which is called for each frame.
// - A destructor, to destroy the objects created in GlSetup.
// Note that when GlSetup and GlRender are called, the GL context has already
// been set, but in the destructor it has not. The destructor should have a
// local variable set to ContextAutoSetter() to make sure it is doing the
// destruction in the right GL context.
// been set, but in the destructor it has not. The destructor should use the
// RunInGlContext() helper to make sure it is doing the destruction in the right
// GL context.
//
// Additionally, you can define a GlBind() method, which will be called to
// enable shader programs, bind any additional textures you may need, etc.

View File

@ -87,11 +87,10 @@ bool GlTextureBuffer::CreateInternal(const void* data) {
}
void GlTextureBuffer::Reuse() {
WaitForConsumersOnGpu();
// TODO: should we just do this inside WaitForConsumersOnGpu?
// if we do that, WaitForConsumersOnGpu can be called only once.
absl::MutexLock lock(&consumer_sync_mutex_);
consumer_multi_sync_->WaitOnGpu();
// Reset the sync points.
consumer_multi_sync_ = absl::make_unique<GlMultiSyncPoint>();
// Reset the token.
producer_sync_ = nullptr;
}
@ -102,11 +101,15 @@ void GlTextureBuffer::Updated(std::shared_ptr<GlSyncPoint> prod_token) {
}
void GlTextureBuffer::DidRead(std::shared_ptr<GlSyncPoint> cons_token) {
absl::MutexLock lock(&consumer_sync_mutex_);
consumer_multi_sync_->Add(std::move(cons_token));
}
GlTextureBuffer::~GlTextureBuffer() {
if (deletion_callback_) {
// Note: at this point there are no more consumers that could be added
// to the consumer_multi_sync_, so it no longer needs to be protected
// by out mutex when we hand it to the deletion callback.
deletion_callback_(std::move(consumer_multi_sync_));
}
}
@ -129,10 +132,17 @@ void GlTextureBuffer::WaitOnGpu() {
}
}
void GlTextureBuffer::WaitForConsumers() { consumer_multi_sync_->Wait(); }
void GlTextureBuffer::WaitForConsumers() {
absl::MutexLock lock(&consumer_sync_mutex_);
consumer_multi_sync_->Wait();
}
void GlTextureBuffer::WaitForConsumersOnGpu() {
absl::MutexLock lock(&consumer_sync_mutex_);
consumer_multi_sync_->WaitOnGpu();
// TODO: should we clear the consumer_multi_sync_ here?
// It would mean that WaitForConsumersOnGpu can be called only once, or more
// precisely, on only one GL context.
}
} // namespace mediapipe

View File

@ -121,15 +121,16 @@ class GlTextureBuffer {
friend class GlCalculatorHelperImpl;
GLuint name_ = 0;
int width_ = 0;
int height_ = 0;
GpuBufferFormat format_ = GpuBufferFormat::kUnknown;
GLenum target_ = GL_TEXTURE_2D;
const int width_ = 0;
const int height_ = 0;
const GpuBufferFormat format_ = GpuBufferFormat::kUnknown;
const GLenum target_ = GL_TEXTURE_2D;
// Token tracking changes to this texture. Used by WaitUntilComplete.
std::shared_ptr<GlSyncPoint> producer_sync_;
absl::Mutex consumer_sync_mutex_;
// Tokens tracking the point when consumers finished using this texture.
std::unique_ptr<GlMultiSyncPoint> consumer_multi_sync_ =
absl::make_unique<GlMultiSyncPoint>();
std::unique_ptr<GlMultiSyncPoint> consumer_multi_sync_ ABSL_GUARDED_BY(
consumer_sync_mutex_) = absl::make_unique<GlMultiSyncPoint>();
DeletionCallback deletion_callback_;
};

View File

@ -0,0 +1,67 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:public"])
cc_library(
name = "template_matching_deps",
deps = [
"//mediapipe/calculators/image:feature_detector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
"//mediapipe/calculators/util:timed_box_list_id_to_label_calculator",
"//mediapipe/calculators/util:timed_box_list_to_render_data_calculator",
"//mediapipe/calculators/video:box_detector_calculator",
],
)
cc_library(
name = "desktop_calculators",
deps = [
":template_matching_deps",
"//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator",
"//mediapipe/calculators/util:local_file_pattern_contents_calculator",
"//mediapipe/calculators/video:opencv_video_decoder_calculator",
"//mediapipe/calculators/video:opencv_video_encoder_calculator",
],
)
cc_library(
name = "mobile_calculators",
deps = [
":template_matching_deps",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
],
)
mediapipe_binary_graph(
name = "mobile_cpu_binary_graph",
graph = "template_matching_mobile_cpu.pbtxt",
output_name = "mobile_cpu.binarypb",
deps = [":mobile_calculators"],
)

View File

@ -0,0 +1,79 @@
# MediaPipe graph that build feature descriptors index for specific target.
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "LocalFilePatternContentsCalculator"
input_side_packet: "FILE_DIRECTORY:file_directory"
input_side_packet: "FILE_SUFFIX:file_suffix"
output_stream: "CONTENTS:encoded_image"
}
node {
calculator: "OpenCvEncodedImageToImageFrameCalculator"
input_stream: "encoded_image"
output_stream: "image_frame"
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:image_frame"
output_stream: "SIZE:input_video_size"
}
node {
calculator: "FeatureDetectorCalculator"
input_stream: "IMAGE:image_frame"
output_stream: "FEATURES:features"
output_stream: "LANDMARKS:landmarks"
output_stream: "PATCHES:patches"
node_options: {
[type.googleapis.com/mediapipe.FeatureDetectorCalculatorOptions] {
max_features: 400
}
}
}
# input tensors: 200*32*32*1 float
# output tensors: 200*40 float, only first keypoint.size()*40 is knift features,
# rest is padded by zero.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:patches"
output_stream: "TENSORS:knift_feature_tensors"
input_stream_handler {
input_stream_handler: "DefaultInputStreamHandler"
}
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/knift_float_400.tflite"
}
}
}
node {
calculator: "TfLiteTensorsToFloatsCalculator"
input_stream: "TENSORS:knift_feature_tensors"
output_stream: "FLOATS:knift_feature_floats"
}
node {
calculator: "BoxDetectorCalculator"
input_side_packet: "OUTPUT_INDEX_FILENAME:output_index_filename"
input_stream: "FEATURES:features"
input_stream: "IMAGE_SIZE:input_video_size"
input_stream: "DESCRIPTORS:knift_feature_floats"
node_options: {
[type.googleapis.com/mediapipe.BoxDetectorCalculatorOptions] {
detector_options {
index_type: OPENCV_BF
detect_every_n_frame: 1
}
}
}
}

View File

@ -0,0 +1,128 @@
# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/template_matching:template_matching_tflite
# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1
# Decodes an input video file into images and a video header.
node {
calculator: "OpenCvVideoDecoderCalculator"
input_side_packet: "INPUT_FILE_PATH:input_video_path"
output_stream: "VIDEO:input_video"
output_stream: "VIDEO_PRESTREAM:input_video_header"
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_video"
output_stream: "SIZE:input_video_size"
}
node {
calculator: "FeatureDetectorCalculator"
input_stream: "IMAGE:input_video"
output_stream: "FEATURES:features"
output_stream: "LANDMARKS:landmarks"
output_stream: "PATCHES:patches"
}
# input tensors: 200*32*32*1 float
# output tensors: 200*40 float, only first keypoint.size()*40 is knift features,
# rest is padded by zero.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:patches"
output_stream: "TENSORS:knift_feature_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/knift_float.tflite"
}
}
}
node {
calculator: "TfLiteTensorsToFloatsCalculator"
input_stream: "TENSORS:knift_feature_tensors"
output_stream: "FLOATS:knift_feature_floats"
}
node {
calculator: "BoxDetectorCalculator"
input_stream: "FEATURES:features"
input_stream: "IMAGE_SIZE:input_video_size"
input_stream: "DESCRIPTORS:knift_feature_floats"
output_stream: "BOXES:detections"
node_options: {
[type.googleapis.com/mediapipe.BoxDetectorCalculatorOptions] {
detector_options {
index_type: OPENCV_BF
detect_every_n_frame: 1
}
index_proto_filename: "mediapipe/models/knift_index.pb"
}
}
}
node {
calculator: "TimedBoxListIdToLabelCalculator"
input_stream: "detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.TimedBoxListIdToLabelCalculatorOptions] {
label_map_path: "mediapipe/models/knift_labelmap.txt"
}
}
}
node {
calculator: "TimedBoxListToRenderDataCalculator"
input_stream: "BOX_LIST:labeled_detections"
output_stream: "RENDER_DATA:box_render_data"
node_options: {
[type.googleapis.com/mediapipe.TimedBoxListToRenderDataCalculatorOptions] {
box_color { r: 255 g: 0 b: 0 }
thickness: 5.0
}
}
}
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 0 g: 255 b: 0 }
thickness: 2.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:input_video"
input_stream: "box_render_data"
input_stream: "landmarks_render_data"
output_stream: "IMAGE:output_video"
}
# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
calculator: "OpenCvVideoEncoderCalculator"
input_stream: "VIDEO:output_video"
input_stream: "VIDEO_PRESTREAM:input_video_header"
input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
node_options: {
[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
codec: "avc1"
video_format: "mp4"
}
}
}

View File

@ -0,0 +1,136 @@
# MediaPipe graph that performs template matching with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/templatematchingcpu
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transfers the input image from GPU to CPU memory.
node: {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "throttled_input_video"
output_stream: "input_video_cpu"
}
# Transforms the input image on CPU to a 480x640 image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:input_video_cpu"
output_stream: "IMAGE:transformed_input_video_cpu"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 480
output_height: 640
}
}
}
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:transformed_input_video_cpu"
output_stream: "SIZE:input_video_size"
}
node {
calculator: "FeatureDetectorCalculator"
input_stream: "IMAGE:transformed_input_video_cpu"
output_stream: "FEATURES:features"
output_stream: "LANDMARKS:landmarks"
output_stream: "PATCHES:patches"
}
# input tensors: 200*32*32*1 float
# output tensors: 200*40 float, only first keypoint.size()*40 is knift features,
# rest is padded by zero.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:patches"
output_stream: "TENSORS:knift_feature_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/knift_float.tflite"
delegate { xnnpack {} }
}
}
}
node {
calculator: "TfLiteTensorsToFloatsCalculator"
input_stream: "TENSORS:knift_feature_tensors"
output_stream: "FLOATS:knift_feature_floats"
}
node {
calculator: "BoxDetectorCalculator"
input_stream: "FEATURES:features"
input_stream: "IMAGE_SIZE:input_video_size"
input_stream: "DESCRIPTORS:knift_feature_floats"
output_stream: "BOXES:detections"
node_options: {
[type.googleapis.com/mediapipe.BoxDetectorCalculatorOptions] {
detector_options {
index_type: OPENCV_BF
detect_every_n_frame: 1
}
index_proto_filename: "mediapipe/models/knift_index.pb"
}
}
}
node {
calculator: "TimedBoxListIdToLabelCalculator"
input_stream: "detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.TimedBoxListIdToLabelCalculatorOptions] {
label_map_path: "mediapipe/models/knift_labelmap.txt"
}
}
}
node {
calculator: "TimedBoxListToRenderDataCalculator"
input_stream: "BOX_LIST:labeled_detections"
output_stream: "RENDER_DATA:box_render_data"
node_options: {
[type.googleapis.com/mediapipe.TimedBoxListToRenderDataCalculatorOptions] {
box_color { r: 255 g: 0 b: 0 }
thickness: 5.0
}
}
}
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
output_stream: "RENDER_DATA:landmarks_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_color { r: 0 g: 255 b: 0 }
thickness: 2.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "box_render_data"
input_stream: "landmarks_render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -34,3 +34,10 @@ Here are the descriptions of the models used in the [example applications](../do
* [Model page](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation)
* Paper: ["Real-time Hair segmentation and recoloring on Mobile GPUs"](https://arxiv.org/abs/1907.06740)
* [Model card](https://drive.google.com/file/d/1lPwJ8BD_-3UUor4LayQ0xpa_RIC_hoRh/view)
### KNIFT (Keypoint Neural Invariant Feature Transform)
* Up to 200 keypoints: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float.tflite)
* Up to 400 keypoints: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float_400.tflite)
* [Google Developers Blog post](https://mediapipe.page.link/knift)
* [Model card](https://mediapipe.page.link/knift-mc)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,3 @@
1USD
20USD
5USD

View File

@ -75,3 +75,31 @@ cc_test(
"//conditions:default": [],
}),
)
cc_library(
name = "tflite_gpu_runner",
srcs = select({
"//mediapipe:ios": [],
"//mediapipe:macos": [],
"//conditions:default": ["tflite_gpu_runner.cc"],
}),
hdrs = select({
"//mediapipe:ios": [],
"//mediapipe:macos": [],
"//conditions:default": ["tflite_gpu_runner.h"],
}),
deps = select({
"//mediapipe:ios": [],
"//mediapipe:macos": [],
"//conditions:default": [
"@com_google_absl//absl/strings",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/delegates/gpu:api",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:model",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:api2",
],
}),
)

View File

@ -0,0 +1,138 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/util/tflite/tflite_gpu_runner.h"
#include <cstdint>
#include <memory>
#include <utility>
#include "absl/strings/substitute.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "tensorflow/lite/delegates/gpu/api.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
#include "tensorflow/lite/model.h"
namespace tflite {
namespace gpu {
namespace {
ObjectDef GetSSBOObjectDef(int channels) {
ObjectDef gpu_object_def;
gpu_object_def.data_type = DataType::FLOAT32;
gpu_object_def.data_layout = DataLayout::BHWC;
if (channels == 4) {
gpu_object_def.data_layout = DataLayout::DHWC4;
}
gpu_object_def.object_type = ObjectType::OPENGL_SSBO;
gpu_object_def.user_provided = true;
return gpu_object_def;
}
} // namespace
mediapipe::Status TFLiteGPURunner::InitializeWithModel(
const tflite::FlatBufferModel& flatbuffer) {
for (const auto& input : graph_->inputs()) {
input_shapes_.push_back(input->tensor.shape);
}
for (const auto& output : graph_->outputs()) {
output_shapes_.push_back(output->tensor.shape);
}
return absl::OkStatus();
}
mediapipe::StatusOr<int64_t> TFLiteGPURunner::GetInputElements(int id) {
if (id >= input_shapes_.size()) {
return ::mediapipe::InternalError("Wrong input tensor id.");
} else {
return input_shapes_[id].DimensionsProduct();
}
}
mediapipe::StatusOr<int64_t> TFLiteGPURunner::GetOutputElements(int id) {
if (id >= output_shapes_.size()) {
return ::mediapipe::InternalError("Wrong output tensor id.");
} else {
return output_shapes_[id].DimensionsProduct();
}
}
mediapipe::Status TFLiteGPURunner::Build() {
// 1. Prepare inference builder.
std::unique_ptr<InferenceBuilder> builder;
MP_RETURN_IF_ERROR(InitializeOpenGL(&builder));
// 2. Describe output/input objects for created builder.
for (int flow_index = 0; flow_index < input_shapes_.size(); ++flow_index) {
if (input_ssbo_ids_.find(flow_index) == input_ssbo_ids_.end()) {
return absl::AlreadyExistsError(absl::Substitute(
"Couldn't find a OpenGL ssbo for input $0.", flow_index));
}
MP_RETURN_IF_ERROR(builder->SetInputObjectDef(
flow_index, GetSSBOObjectDef(input_shapes_[flow_index].c)));
}
for (int flow_index = 0; flow_index < output_shapes_.size(); ++flow_index) {
if (output_ssbo_ids_.find(flow_index) == output_ssbo_ids_.end()) {
return absl::AlreadyExistsError(absl::Substitute(
"Couldn't find a OpenGL ssbo for output $0.", flow_index));
}
MP_RETURN_IF_ERROR(builder->SetOutputObjectDef(
flow_index, GetSSBOObjectDef(output_shapes_[flow_index].c)));
}
// 3. Build inference runner with the created builder.
return builder->Build(&runner_);
}
mediapipe::Status TFLiteGPURunner::BindSSBOToInputTensor(GLuint ssbo_id,
int input_id) {
OpenGlBuffer buffer;
buffer.id = ssbo_id;
return runner_->SetInputObject(input_id, std::move(buffer));
}
mediapipe::Status TFLiteGPURunner::BindSSBOToOutputTensor(GLuint ssbo_id,
int output_id) {
OpenGlBuffer buffer;
buffer.id = ssbo_id;
return runner_->SetOutputObject(output_id, std::move(buffer));
}
mediapipe::Status TFLiteGPURunner::Invoke() { return runner_->Run(); }
mediapipe::Status TFLiteGPURunner::InitializeOpenGL(
std::unique_ptr<InferenceBuilder>* builder) {
gl::InferenceEnvironmentOptions env_options;
gl::InferenceEnvironmentProperties properties;
gl::InferenceOptions gl_options;
gl_options.priority1 = options_.priority1;
gl_options.priority2 = options_.priority2;
gl_options.priority3 = options_.priority3;
gl_options.usage = options_.usage;
MP_RETURN_IF_ERROR(
NewInferenceEnvironment(env_options, &gl_environment_, &properties));
MP_RETURN_IF_ERROR(gl_environment_->NewInferenceBuilder(std::move(*graph_),
gl_options, builder));
graph_.release();
return absl::OkStatus();
}
} // namespace gpu
} // namespace tflite

View File

@ -0,0 +1,90 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TFLITE_TFLITE_GPU_RUNNER_H_
#define MEDIAPIPE_CALCULATORS_TFLITE_TFLITE_GPU_RUNNER_H_
#include <cstdint>
#include <memory>
#include <vector>
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/statusor.h"
#include "tensorflow/lite/delegates/gpu/api.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"
#include "tensorflow/lite/delegates/gpu/gl/api2.h"
#include "tensorflow/lite/model.h"
namespace tflite {
namespace gpu {
// Executes GPU based inference using the TFLite GPU delegate api2.
// Currently supports only GPU inputs/outputs.
//
// Typical order of execution:
// 1. Initialize with the flatbuffer model using InitializeWithModel().
// 2. Bind OpenGL SSBO objects as inputs and outputs using
// BindSSBOToInputTensor() and BindSSBOToOutputTensor().
// 3. Build the inference runner with Build() method.
// 4. Invoke() executes the inference, where inputs and outputs are those which
// were specified earlier. Invoke() may be called in the loop.
//
// Note: All of these need to happen inside MediaPipe's RunInGlContext to make
// sure that all steps from inference construction to execution are made using
// same OpenGL context.
class TFLiteGPURunner {
public:
explicit TFLiteGPURunner(const InferenceOptions& options)
: options_(options) {}
mediapipe::Status InitializeWithModel(
const tflite::FlatBufferModel& flatbuffer);
mediapipe::Status BindSSBOToInputTensor(GLuint ssbo_id, int input_id);
mediapipe::Status BindSSBOToOutputTensor(GLuint ssbo_id, int output_id);
int inputs_size() const { return input_shapes_.size(); }
int outputs_size() const { return output_shapes_.size(); }
mediapipe::StatusOr<int64_t> GetInputElements(int id);
mediapipe::StatusOr<int64_t> GetOutputElements(int id);
mediapipe::Status Build();
mediapipe::Status Invoke();
private:
mediapipe::Status InitializeOpenGL(
std::unique_ptr<InferenceBuilder>* builder);
InferenceOptions options_;
std::unique_ptr<gl::InferenceEnvironment> gl_environment_;
// graph_ is maintained temporarily and becomes invalid after runner_ is ready
std::unique_ptr<GraphFloat32> graph_;
std::unique_ptr<InferenceRunner> runner_;
// Store registered OpenGL ssbo ids for the corresponding input/output tensor.
// key: io tensor position, value: OpenGL ssbo id.
std::unordered_map<int, GLuint> input_ssbo_ids_;
std::unordered_map<int, GLuint> output_ssbo_ids_;
// We keep information about input/output shapes, because they are needed
// after graph_ becomes "converted" into runner_.
std::vector<BHWC> input_shapes_;
std::vector<BHWC> output_shapes_;
};
} // namespace gpu
} // namespace tflite
#endif // MEDIAPIPE_CALCULATORS_TFLITE_TFLITE_GPU_RUNNER_H_

View File

@ -68,10 +68,10 @@ message BoxDetectorOptions {
optional int32 min_num_correspondence = 6 [default = 5];
// Reprojection threshold for RANSAC to find inliers.
optional float ransac_reprojection_threshold = 7 [default = 0.02];
optional float ransac_reprojection_threshold = 7 [default = 0.005];
// Max distance to match 2 NIMBY features.
optional float max_match_distance = 8 [default = 0.8];
optional float max_match_distance = 8 [default = 0.9];
// Max persepective change factor.
optional float max_perspective_factor = 9 [default = 0.1];