Project import generated by Copybara.

GitOrigin-RevId: afeb9cf5a8c069c0a566d16e1622bbb086170e4d
This commit is contained in:
MediaPipe Team 2020-05-21 12:46:31 -04:00 committed by chuoling
parent b6e680647c
commit b133b0f200
258 changed files with 4146 additions and 5147 deletions

View File

@ -1,20 +1,30 @@
# The bazelrc file for MediaPipe OSS.
# Tensorflow needs remote repo
common --experimental_repo_remote_exec
# Basic build settings
build --jobs 128
build --define='absl=1'
build --cxxopt='-std=c++14'
build --copt='-Wno-sign-compare'
build --copt='-Wno-unused-function'
build --copt='-Wno-uninitialized'
build --copt='-Wno-unused-result'
build --copt='-Wno-comment'
build --copt='-Wno-return-type'
build --copt='-Wno-unused-local-typedefs'
build --copt='-Wno-ignored-attributes'
build --enable_platform_specific_config
# Tensorflow needs remote repo
build --experimental_repo_remote_exec
# Linux
build:linux --cxxopt=-std=c++14
build:linux --host_cxxopt=-std=c++14
build:linux --copt=-w
# windows
build:windows --cxxopt=/std:c++14
build:windows --host_cxxopt=/std:c++14
build:windows --copt=/w
# For using M_* math constants on Windows with MSVC.
build:windows --copt=/D_USE_MATH_DEFINES
build:windows --host_copt=/D_USE_MATH_DEFINES
# macOS
build:macos --cxxopt=-std=c++14
build:macos --host_cxxopt=-std=c++14
build:macos --copt=-w
# Sets the default Apple platform to macOS.
build --apple_platform_type=macos

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
FROM ubuntu:latest
FROM ubuntu:18.04
MAINTAINER <mediapipe@google.com>
@ -25,11 +25,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
ffmpeg \
git \
wget \
unzip \
python \
python-pip \
python3-dev \
python3-opencv \
python3-pip \
libopencv-core-dev \
libopencv-highgui-dev \
@ -43,9 +44,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade setuptools
RUN pip install future
RUN pip3 install six
RUN pip3 install --upgrade setuptools
RUN pip3 install wheel
RUN pip3 install future
RUN pip3 install six==1.14.0
RUN pip3 install tensorflow==1.14.0
RUN pip3 install tf_slim
RUN ln -s /usr/bin/python3 /usr/bin/python
# Install bazel
ARG BAZEL_VERSION=2.0.0

View File

@ -76,7 +76,9 @@ Search MediaPipe Github repository using [Google Open Source code search](https:
* [Google Industry Workshop at ICIP 2019](http://2019.ieeeicip.org/?action=page4&id=14#Google) [Presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5) on Sept 24 in Taipei, Taiwan
* [Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA
## Community forum
## Community
* [Awesome MediaPipe: curation of code related to MediaPipe](https://mediapipe.org)
* [Slack community for MediaPipe users](https://mediapipe.slack.com)
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe
## Alpha Disclaimer

122
WORKSPACE
View File

@ -54,17 +54,15 @@ http_archive(
# gflags needed by glog
http_archive(
name = "com_github_gflags_gflags",
sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
urls = [
"https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
"https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
],
strip_prefix = "gflags-2.2.2",
sha256 = "19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5",
url = "https://github.com/gflags/gflags/archive/v2.2.2.zip",
)
# glog
# glog v0.3.5
# TODO: Migrate MediaPipe to use com_github_glog_glog on all platforms.
http_archive(
name = "com_github_glog_glog",
name = "com_github_glog_glog_v_0_3_5",
url = "https://github.com/google/glog/archive/v0.3.5.zip",
sha256 = "267103f8a1e9578978aa1dc256001e6529ef593e5aea38193d31c2872ee025e8",
strip_prefix = "glog-0.3.5",
@ -77,6 +75,16 @@ http_archive(
],
)
# 2020-02-16
http_archive(
name = "com_github_glog_glog",
strip_prefix = "glog-3ba8976592274bc1f907c402ce22558011d6fc5e",
sha256 = "feca3c7e29a693cab7887409756d89d342d4a992d54d7c5599bebeae8f7b50be",
urls = [
"https://github.com/google/glog/archive/3ba8976592274bc1f907c402ce22558011d6fc5e.zip",
],
)
# easyexif
http_archive(
name = "easyexif",
@ -101,51 +109,30 @@ http_archive(
urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz"],
)
http_archive(
name = "com_google_protobuf",
sha256 = "a79d19dcdf9139fa4b81206e318e33d245c4c9da1ffed21c87288ed4380426f9",
strip_prefix = "protobuf-3.11.4",
urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz"],
patches = [
"@//third_party:com_google_protobuf_fixes.diff"
],
patch_args = [
"-p1",
],
)
http_archive(
name = "com_google_audio_tools",
strip_prefix = "multichannel-audio-tools-master",
urls = ["https://github.com/google/multichannel-audio-tools/archive/master.zip"],
)
# Needed by TensorFlow
http_archive(
name = "io_bazel_rules_closure",
sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9",
strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df",
urls = [
"http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz",
"https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04
],
)
# 2020-04-01
_TENSORFLOW_GIT_COMMIT = "805e47cea96c7e8c6fccf494d40a2392dc99fdd8"
_TENSORFLOW_SHA256= "9ee3ae604c2e1345ac60345becee6d659364721513f9cb8652eb2e7138320ca5"
http_archive(
name = "org_tensorflow",
urls = [
"https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
"https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
],
patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_protobuf_updates.diff",
],
patch_args = [
"-p1",
],
strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
sha256 = _TENSORFLOW_SHA256,
)
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace(tf_repo_name = "org_tensorflow")
http_archive(
name = "ceres_solver",
url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
patches = [
"@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff"
"@//third_party:ceres_solver_compatibility_fixes.diff"
],
patch_args = [
"-p1",
@ -178,6 +165,12 @@ new_local_repository(
path = "/usr",
)
new_local_repository(
name = "windows_opencv",
build_file = "@//third_party:opencv_windows.BUILD",
path = "C:\\opencv\\build",
)
http_archive(
name = "android_opencv",
build_file = "@//third_party:opencv_android.BUILD",
@ -236,6 +229,15 @@ load(
swift_rules_dependencies()
http_archive(
name = "build_bazel_apple_support",
sha256 = "122ebf7fe7d1c8e938af6aeaee0efe788a3a2449ece5a8d6a428cb18d6f88033",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz",
"https://github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz",
],
)
load(
"@build_bazel_apple_support//lib:repositories.bzl",
"apple_support_dependencies",
@ -299,3 +301,37 @@ maven_install(
fetch_sources = True,
version_conflict_policy = "pinned",
)
# Needed by TensorFlow
http_archive(
name = "io_bazel_rules_closure",
sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9",
strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df",
urls = [
"http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz",
"https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04
],
)
#Tensorflow repo should always go after the other external dependencies.
# 2020-05-11
_TENSORFLOW_GIT_COMMIT = "7c09d15f9fcc14343343c247ebf5b8e0afe3e4aa"
_TENSORFLOW_SHA256= "673d00cbd2676ae43df1993e0d28c10b5ffbe96d9e2ab29f88a77b43c0211299"
http_archive(
name = "org_tensorflow",
urls = [
"https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
"https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
],
patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff",
],
patch_args = [
"-p1",
],
strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
sha256 = _TENSORFLOW_SHA256,
)
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace(tf_repo_name = "org_tensorflow")

View File

@ -134,6 +134,11 @@ config_setting(
]
]
config_setting(
name = "windows",
values = {"cpu": "x64_windows"},
)
exports_files(
["provisioning_profile.mobileprovision"],
visibility = ["//visibility:public"],

View File

@ -500,6 +500,7 @@ cc_library(
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:status",
"//mediapipe/framework/tool:options_util",
],
alwayslink = 1,
)

View File

@ -24,11 +24,13 @@
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/tool/options_util.h"
namespace mediapipe {
namespace {
const double kTimebaseUs = 1000000; // Microseconds.
const char* const kOptionsTag = "OPTIONS";
const char* const kPeriodTag = "PERIOD";
} // namespace
@ -63,9 +65,15 @@ const char* const kPeriodTag = "PERIOD";
// Thinning period can be provided in the calculator options or via a
// side packet with the tag "PERIOD".
//
// Calculator options provided optionally with the "OPTIONS" input
// sidepacket tag will be merged with this calculator's node options, i.e.,
// singular fields of the side packet will overwrite the options defined in the
// node, and repeated fields will concatenate.
//
// Example config:
// node {
// calculator: "PacketThinnerCalculator"
// input_side_packet: "OPTIONS:calculator_options"
// input_stream: "signal"
// output_stream: "output"
// options {
@ -83,6 +91,9 @@ class PacketThinnerCalculator : public CalculatorBase {
~PacketThinnerCalculator() override {}
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
}
cc->Inputs().Index(0).SetAny();
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
if (cc->InputSidePackets().HasTag(kPeriodTag)) {
@ -143,7 +154,9 @@ TimestampDiff abs(TimestampDiff t) { return t < 0 ? -t : t; }
} // namespace
::mediapipe::Status PacketThinnerCalculator::Open(CalculatorContext* cc) {
auto& options = cc->Options<PacketThinnerCalculatorOptions>();
PacketThinnerCalculatorOptions options = mediapipe::tool::RetrieveOptions(
cc->Options<PacketThinnerCalculatorOptions>(), cc->InputSidePackets(),
kOptionsTag);
thinner_type_ = options.thinner_type();
// This check enables us to assume only two thinner types exist in Process()

View File

@ -93,8 +93,7 @@ class PreviousLoopbackCalculator : public CalculatorBase {
// MAIN packet, hence not caring about corresponding loop packet.
loop_timestamp = Timestamp::Unset();
}
main_packet_specs_.push_back({.timestamp = main_packet.Timestamp(),
.loop_timestamp = loop_timestamp});
main_packet_specs_.push_back({main_packet.Timestamp(), loop_timestamp});
prev_main_ts_ = main_packet.Timestamp();
}

View File

@ -38,9 +38,11 @@ void SetColorChannel(int channel, uint8 value, cv::Mat* mat) {
constexpr char kRgbaInTag[] = "RGBA_IN";
constexpr char kRgbInTag[] = "RGB_IN";
constexpr char kBgraInTag[] = "BGRA_IN";
constexpr char kGrayInTag[] = "GRAY_IN";
constexpr char kRgbaOutTag[] = "RGBA_OUT";
constexpr char kRgbOutTag[] = "RGB_OUT";
constexpr char kBgraOutTag[] = "BGRA_OUT";
constexpr char kGrayOutTag[] = "GRAY_OUT";
} // namespace
@ -53,6 +55,8 @@ constexpr char kGrayOutTag[] = "GRAY_OUT";
// GRAY -> RGB
// RGB -> GRAY
// RGB -> RGBA
// RGBA -> BGRA
// BGRA -> RGBA
//
// This calculator only supports a single input stream and output stream at a
// time. If more than one input stream or output stream is present, the
@ -63,11 +67,13 @@ constexpr char kGrayOutTag[] = "GRAY_OUT";
// Input streams:
// RGBA_IN: The input video stream (ImageFrame, SRGBA).
// RGB_IN: The input video stream (ImageFrame, SRGB).
// BGRA_IN: The input video stream (ImageFrame, SBGRA).
// GRAY_IN: The input video stream (ImageFrame, GRAY8).
//
// Output streams:
// RGBA_OUT: The output video stream (ImageFrame, SRGBA).
// RGB_OUT: The output video stream (ImageFrame, SRGB).
// BGRA_OUT: The output video stream (ImageFrame, SBGRA).
// GRAY_OUT: The output video stream (ImageFrame, GRAY8).
class ColorConvertCalculator : public CalculatorBase {
public:
@ -113,6 +119,10 @@ REGISTER_CALCULATOR(ColorConvertCalculator);
cc->Inputs().Tag(kRgbInTag).Set<ImageFrame>();
}
if (cc->Inputs().HasTag(kBgraInTag)) {
cc->Inputs().Tag(kBgraInTag).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kRgbOutTag)) {
cc->Outputs().Tag(kRgbOutTag).Set<ImageFrame>();
}
@ -125,6 +135,10 @@ REGISTER_CALCULATOR(ColorConvertCalculator);
cc->Outputs().Tag(kRgbaOutTag).Set<ImageFrame>();
}
if (cc->Outputs().HasTag(kBgraOutTag)) {
cc->Outputs().Tag(kBgraOutTag).Set<ImageFrame>();
}
return ::mediapipe::OkStatus();
}
@ -171,6 +185,16 @@ REGISTER_CALCULATOR(ColorConvertCalculator);
return ConvertAndOutput(kRgbInTag, kRgbaOutTag, ImageFormat::SRGBA,
cv::COLOR_RGB2RGBA, cc);
}
// BGRA -> RGBA
if (cc->Inputs().HasTag(kBgraInTag) && cc->Outputs().HasTag(kRgbaOutTag)) {
return ConvertAndOutput(kBgraInTag, kRgbaOutTag, ImageFormat::SRGBA,
cv::COLOR_BGRA2RGBA, cc);
}
// RGBA -> BGRA
if (cc->Inputs().HasTag(kRgbaInTag) && cc->Outputs().HasTag(kBgraOutTag)) {
return ConvertAndOutput(kRgbaInTag, kBgraOutTag, ImageFormat::SBGRA,
cv::COLOR_RGBA2BGRA, cc);
}
return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC)
<< "Unsupported image format conversion.";

View File

@ -514,13 +514,7 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc,
}
}
return {
.width = crop_width,
.height = crop_height,
.center_x = x_center,
.center_y = y_center,
.rotation = rotation,
};
return {crop_width, crop_height, x_center, y_center, rotation};
}
::mediapipe::Status ImageCroppingCalculator::GetBorderModeForOpenCV(

View File

@ -392,19 +392,26 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
}
cv::Mat scaled_mat;
int output_width = output_width_;
int output_height = output_height_;
if (scale_mode_ == mediapipe::ScaleMode_Mode_STRETCH) {
cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_));
int scale_flag =
input_mat.cols > output_width_ && input_mat.rows > output_height_
? cv::INTER_AREA
: cv::INTER_LINEAR;
cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_),
0, 0, scale_flag);
} else {
const float scale =
std::min(static_cast<float>(output_width_) / input_width,
static_cast<float>(output_height_) / input_height);
const int target_width = std::round(input_width * scale);
const int target_height = std::round(input_height * scale);
int scale_flag = scale < 1.0f ? cv::INTER_AREA : cv::INTER_LINEAR;
if (scale_mode_ == mediapipe::ScaleMode_Mode_FIT) {
cv::Mat intermediate_mat;
cv::resize(input_mat, intermediate_mat,
cv::Size(target_width, target_height));
cv::Size(target_width, target_height), 0, 0, scale_flag);
const int top = (output_height_ - target_height) / 2;
const int bottom = output_height_ - target_height - top;
const int left = (output_width_ - target_width) / 2;
@ -413,16 +420,13 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
options_.constant_padding() ? cv::BORDER_CONSTANT
: cv::BORDER_REPLICATE);
} else {
cv::resize(input_mat, scaled_mat, cv::Size(target_width, target_height));
output_width_ = target_width;
output_height_ = target_height;
cv::resize(input_mat, scaled_mat, cv::Size(target_width, target_height),
0, 0, scale_flag);
output_width = target_width;
output_height = target_height;
}
}
int output_width;
int output_height;
ComputeOutputDimensions(input_width, input_height, &output_width,
&output_height);
if (cc->Outputs().HasTag("LETTERBOX_PADDING")) {
auto padding = absl::make_unique<std::array<float, 4>>();
ComputeOutputLetterboxPadding(input_width, input_height, output_width,

View File

@ -321,7 +321,7 @@ cc_library(
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_lib_lite",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
}),
alwayslink = 1,
@ -343,7 +343,7 @@ cc_library(
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_lib_lite",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
}),
alwayslink = 1,
@ -449,10 +449,10 @@ cc_library(
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
"//mediapipe:ios": [
"@org_tensorflow//tensorflow/core:ios_tensorflow_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib",
],
}),
alwayslink = 1,
@ -470,10 +470,10 @@ cc_library(
"@org_tensorflow//tensorflow/core:core",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
"//mediapipe:ios": [
"@org_tensorflow//tensorflow/core:ios_tensorflow_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib",
],
}),
)
@ -496,11 +496,11 @@ cc_library(
"@org_tensorflow//tensorflow/core:core",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
"//mediapipe/android/file/base",
],
"//mediapipe:ios": [
"@org_tensorflow//tensorflow/core:ios_tensorflow_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib",
"//mediapipe/android/file/base",
],
}),
@ -525,11 +525,11 @@ cc_library(
"@org_tensorflow//tensorflow/core:core",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
"//mediapipe/android/file/base",
],
"//mediapipe:ios": [
"@org_tensorflow//tensorflow/core:ios_tensorflow_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib",
"//mediapipe/android/file/base",
],
}),
@ -637,7 +637,7 @@ cc_library(
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_lib_lite",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
}),
alwayslink = 1,
@ -673,7 +673,7 @@ cc_library(
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_lib_lite",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
}),
alwayslink = 1,
@ -1109,11 +1109,11 @@ cc_test(
"@org_tensorflow//tensorflow/core:direct_session",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:android_tensorflow_lib_with_ops_lite_proto_no_rtti_lib",
"@org_tensorflow//tensorflow/core:android_tensorflow_test_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_test_lib",
],
"//mediapipe:ios": [
"@org_tensorflow//tensorflow/core:ios_tensorflow_test_lib",
"@org_tensorflow//tensorflow/core:portable_tensorflow_test_lib",
],
}),
)

View File

@ -198,6 +198,7 @@ cc_test(
cc_library(
name = "util",
hdrs = ["util.h"],
visibility = ["//visibility:public"],
alwayslink = 1,
)
@ -525,16 +526,16 @@ cc_test(
":tflite_converter_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"//mediapipe/framework/tool:validate_type",
"@com_google_absl//absl/memory",
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
],
)

View File

@ -26,8 +26,12 @@ namespace {
float CalculateScale(float min_scale, float max_scale, int stride_index,
int num_strides) {
return min_scale +
(max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
if (num_strides == 1) {
return (min_scale + max_scale) * 0.5f;
} else {
return min_scale +
(max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
}
}
} // namespace
@ -114,7 +118,7 @@ REGISTER_CALCULATOR(SsdAnchorsCalculator);
}
int layer_id = 0;
while (layer_id < options.strides_size()) {
while (layer_id < options.num_layers()) {
std::vector<float> anchor_height;
std::vector<float> anchor_width;
std::vector<float> aspect_ratios;

View File

@ -67,10 +67,12 @@ constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
constexpr char kMatrixTag[] = "MATRIX";
} // namespace
namespace mediapipe {
namespace {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer;
using ::tflite::gpu::gl::GlProgram;
@ -89,6 +91,8 @@ struct GPUData {
};
#endif
} // namespace
// Calculator for normalizing and converting an ImageFrame or Matrix
// into a TfLiteTensor (float 32) or a GpuBuffer to a tflite::gpu::GlBuffer
// or MTLBuffer.
@ -164,6 +168,9 @@ class TfLiteConverterCalculator : public CalculatorBase {
bool initialized_ = false;
bool use_gpu_ = false;
bool zero_center_ = true; // normalize range to [-1,1] | otherwise [0,1]
bool use_custom_normalization_ = false;
float custom_div_ = -1.0f;
float custom_sub_ = -1.0f;
bool flip_vertically_ = false;
bool row_major_matrix_ = false;
bool use_quantized_tensors_ = false;
@ -175,7 +182,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
CalculatorContract* cc) {
// Confirm only one of the input streams is present.
RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^
cc->Inputs().HasTag(kGpuBufferTag) ^ cc->Inputs().HasTag("MATRIX"));
cc->Inputs().HasTag(kGpuBufferTag) ^
cc->Inputs().HasTag(kMatrixTag));
// Confirm only one of the output streams is present.
RET_CHECK(cc->Outputs().HasTag(kTensorsTag) ^
@ -186,8 +194,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
if (cc->Inputs().HasTag("MATRIX")) {
cc->Inputs().Tag("MATRIX").Set<Matrix>();
if (cc->Inputs().HasTag(kMatrixTag)) {
cc->Inputs().Tag(kMatrixTag).Set<Matrix>();
}
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
if (cc->Inputs().HasTag(kGpuBufferTag)) {
@ -257,6 +265,9 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::Process(CalculatorContext* cc) {
if (use_gpu_) {
if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
if (!initialized_) {
MP_RETURN_IF_ERROR(InitGpu(cc));
initialized_ = true;
@ -283,6 +294,9 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
::mediapipe::Status TfLiteConverterCalculator::ProcessCPU(
CalculatorContext* cc) {
if (cc->Inputs().HasTag(kImageFrameTag)) {
if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// CPU ImageFrame to TfLiteTensor conversion.
const auto& image_frame =
@ -361,10 +375,12 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
} else if (cc->Inputs().HasTag("MATRIX")) {
} else if (cc->Inputs().HasTag(kMatrixTag)) {
if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// CPU Matrix to TfLiteTensor conversion.
const auto& matrix = cc->Inputs().Tag("MATRIX").Get<Matrix>();
const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get<Matrix>();
const int height = matrix.rows();
const int width = matrix.cols();
const int channels = 1;
@ -614,6 +630,11 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator);
// Get data normalization mode.
zero_center_ = options.zero_center();
// Custom div and sub values.
use_custom_normalization_ = options.use_custom_normalization();
custom_div_ = options.custom_div();
custom_sub_ = options.custom_sub();
// Get y-flip mode.
flip_vertically_ = options.flip_vertically();
@ -649,7 +670,13 @@ template <class T>
const int channels_ignored = channels - channels_preserved;
float div, sub;
if (zero_center) {
if (use_custom_normalization_) {
RET_CHECK_GT(custom_div_, 0.0f);
RET_CHECK_GE(custom_sub_, 0.0f);
div = custom_div_;
sub = custom_sub_;
} else if (zero_center) {
// [-1,1]
div = 127.5f;
sub = 1.0f;

View File

@ -28,6 +28,16 @@ message TfLiteConverterCalculatorOptions {
// Ignored if using quantization.
optional bool zero_center = 1 [default = true];
// Custom settings to override the internal scaling factors `div` and `sub`.
// Both values must be set to non-negative values. Will only take effect on
// CPU AND when |use_custom_normalization| is set to true. When these custom
// values take effect, the |zero_center| setting above will be overriden, and
// the normalized_value will be calculated as:
// normalized_value = input / custom_div - custom_sub.
optional bool use_custom_normalization = 6 [default = false];
optional float custom_div = 7 [default = -1.0];
optional float custom_sub = 8 [default = -1.0];
// Whether the input image should be flipped vertically (along the
// y-direction). This is useful, for example, when the input image is defined
// with a coordinate system where the origin is at the bottom-left corner

View File

@ -19,6 +19,9 @@
#include "mediapipe/calculators/tflite/tflite_converter_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
@ -28,7 +31,6 @@
#include "tensorflow/lite/interpreter.h"
namespace mediapipe {
namespace {
constexpr char kTransposeOptionsString[] =
@ -196,4 +198,55 @@ TEST_F(TfLiteConverterCalculatorTest, RandomMatrixRowMajor) {
}
}
TEST_F(TfLiteConverterCalculatorTest, CustomDivAndSub) {
CalculatorGraph graph;
// Run the calculator and verify that one output is generated.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "input_image"
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:input_image"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TfLiteConverterCalculatorOptions.ext] {
row_major_matrix: true
use_custom_normalization: true
custom_div: 2.0
custom_sub: 33.0
}
}
}
)");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = ::mediapipe::formats::MatView(input_image.get());
mat.at<uint8>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
// Wait until the calculator done processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_EQ(1, output_packets.size());
// Get and process results.
const std::vector<TfLiteTensor>& tensor_vec =
output_packets[0].Get<std::vector<TfLiteTensor>>();
EXPECT_EQ(1, tensor_vec.size());
const TfLiteTensor* tensor = &tensor_vec[0];
EXPECT_EQ(kTfLiteFloat32, tensor->type);
EXPECT_FLOAT_EQ(67.0f, *tensor->data.f);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
} // namespace mediapipe

View File

@ -57,7 +57,10 @@
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
#endif // iOS
#if !defined(MEDIAPIPE_EDGE_TPU)
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
#endif // !EDGETPU
#if defined(MEDIAPIPE_ANDROID)
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#endif // ANDROID
@ -116,11 +119,13 @@ using ::tflite::gpu::gl::GlBuffer;
#endif
#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
namespace {
struct GPUData {
int elements = 1;
GpuTensor buffer;
::tflite::gpu::BHWC shape;
};
} // namespace
#endif
// Returns number of threads to configure XNNPACK delegate with.
@ -405,8 +410,11 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
// 1. Receive pre-processed tensor inputs.
if (use_advanced_gpu_api_) {
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& input_tensors =
cc->Inputs().Tag("TENSORS_GPU").Get<std::vector<GpuTensor>>();
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK(!input_tensors.empty());
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors]() -> ::mediapipe::Status {
@ -424,6 +432,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
} else if (gpu_input_) {
// Read GPU input into SSBO.
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& input_tensors =
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GT(input_tensors.size(), 0);
@ -439,6 +450,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
return ::mediapipe::OkStatus();
}));
#elif defined(MEDIAPIPE_IOS)
if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& input_tensors =
cc->Inputs().Tag(kTensorsGpuTag).Get<std::vector<GpuTensor>>();
RET_CHECK_GT(input_tensors.size(), 0);
@ -465,6 +479,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
RET_CHECK_FAIL() << "GPU processing not enabled.";
#endif
} else {
if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// Read CPU input into tensors.
const auto& input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<TfLiteTensor>>();
@ -511,10 +528,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
auto output_tensors = absl::make_unique<std::vector<GpuTensor>>();
output_tensors->resize(gpu_data_out_.size());
for (int i = 0; i < gpu_data_out_.size(); ++i) {
output_tensors->at(i) = gpu_data_out_[0]->buffer.MakeRef();
output_tensors->at(i) = gpu_data_out_[i]->buffer.MakeRef();
}
cc->Outputs()
.Tag("TENSORS_GPU")
.Tag(kTensorsGpuTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#endif
} else if (gpu_output_) {
@ -637,7 +654,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
tflite_gpu_runner_ =
std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
return tflite_gpu_runner_->InitializeWithModel(model);
return tflite_gpu_runner_->InitializeWithModel(model, op_resolver);
}
#endif
@ -730,6 +747,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
calculator_opts.delegate().has_xnnpack();
#endif // __EMSCRIPTEN__
#if !defined(MEDIAPIPE_EDGE_TPU)
if (xnnpack_requested) {
TfLiteXNNPackDelegateOptions xnnpack_opts{};
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
@ -738,6 +756,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk);
}
#endif // !EDGETPU
// Return, no need for GPU delegate below.
return ::mediapipe::OkStatus();

View File

@ -77,7 +77,10 @@ using ::tflite::gpu::gl::GlShader;
// Performs optional upscale to REFERENCE_IMAGE dimensions if provided,
// otherwise the mask is the same size as input tensor.
//
// Produces result as an RGBA image, with the mask in both R & A channels.
// Produces result as an RGBA image, with the mask in both R & A channels. The
// value of each pixel is the probability of the specified class after softmax,
// scaled to 255 on CPU. The class can be specified through the
// |output_layer_index| option.
//
// Inputs:
// One of the following TENSORS tags:

View File

@ -276,6 +276,41 @@ cc_test(
],
)
cc_library(
name = "clock_timestamp_calculator",
srcs = ["clock_timestamp_calculator.cc"],
visibility = [
"//visibility:public",
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/deps:clock",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/time",
],
alwayslink = 1,
)
cc_library(
name = "clock_latency_calculator",
srcs = ["clock_latency_calculator.cc"],
visibility = [
"//visibility:public",
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/time",
],
alwayslink = 1,
)
cc_library(
name = "annotation_overlay_calculator",
srcs = ["annotation_overlay_calculator.cc"],

View File

@ -0,0 +1,116 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/time/time.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
// Tag name for reference signal.
constexpr char kReferenceTag[] = "REFERENCE";
} // namespace
// A calculator that diffs multiple input absl::Time streams against a
// reference Time stream, and outputs the resulting absl::Duration's. Useful
// in combination with ClockTimestampCalculator to be able to determine the
// latency between two different points in a graph.
//
// Inputs: At least one non-reference Time stream is required.
// 0- Time stream 0
// 1- Time stream 1
// ...
// N- Time stream N
// REFERENCE_SIGNAL (required): The Time stream by which all others are
// compared. Should be the stream from which our other streams were
// computed, in order to provide meaningful latency results.
//
// Outputs:
// 0- Duration from REFERENCE_SIGNAL to input stream 0
// 1- Duration from REFERENCE_SIGNAL to input stream 1
// ...
// N- Duration from REFERENCE_SIGNAL to input stream N
//
// Example config:
// node {
// calculator: "ClockLatencyCalculator"
// input_stream: "packet_clocktime_stream_0"
// input_stream: "packet_clocktime_stream_1"
// input_stream: "packet_clocktime_stream_2"
// input_stream: "REFERENCE_SIGNAL: packet_clocktime_stream_reference"
// output_stream: "packet_latency_stream_0"
// output_stream: "packet_latency_stream_1"
// output_stream: "packet_latency_stream_2"
// }
//
class ClockLatencyCalculator : public CalculatorBase {
public:
ClockLatencyCalculator() {}
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
int64 num_packet_streams_ = -1;
};
REGISTER_CALCULATOR(ClockLatencyCalculator);
::mediapipe::Status ClockLatencyCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK_GT(cc->Inputs().NumEntries(), 1);
int64 num_packet_streams = cc->Inputs().NumEntries() - 1;
RET_CHECK_EQ(cc->Outputs().NumEntries(), num_packet_streams);
for (int64 i = 0; i < num_packet_streams; ++i) {
cc->Inputs().Index(i).Set<absl::Time>();
cc->Outputs().Index(i).Set<absl::Duration>();
}
cc->Inputs().Tag(kReferenceTag).Set<absl::Time>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status ClockLatencyCalculator::Open(CalculatorContext* cc) {
// Direct passthrough, as far as timestamp and bounds are concerned.
cc->SetOffset(TimestampDiff(0));
num_packet_streams_ = cc->Inputs().NumEntries() - 1;
return ::mediapipe::OkStatus();
}
::mediapipe::Status ClockLatencyCalculator::Process(CalculatorContext* cc) {
// Get reference time.
RET_CHECK(!cc->Inputs().Tag(kReferenceTag).IsEmpty());
const absl::Time& reference_time =
cc->Inputs().Tag(kReferenceTag).Get<absl::Time>();
// Push Duration packets for every input stream we have.
for (int64 i = 0; i < num_packet_streams_; ++i) {
if (!cc->Inputs().Index(i).IsEmpty()) {
const absl::Time& input_stream_time =
cc->Inputs().Index(i).Get<absl::Time>();
cc->Outputs().Index(i).AddPacket(
MakePacket<absl::Duration>(input_stream_time - reference_time)
.At(cc->InputTimestamp()));
}
}
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,108 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/time/time.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/deps/clock.h"
#include "mediapipe/framework/deps/monotonic_clock.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
// Tag name for clock side packet.
constexpr char kClockTag[] = "CLOCK";
} // namespace
// A calculator that outputs the current clock time at which it receives input
// packets. Use a separate instance of this calculator for each input stream
// you wish to output a clock time for.
//
// InputSidePacket (Optional):
// CLOCK: A clock to use for querying the current time.
//
// Inputs:
// A single packet stream we wish to get the current clocktime for
// Outputs:
// A single stream of absl::Time packets, representing the clock time at which
// we received the input stream's packets.
// Example config:
// node {
// calculator: "ClockTimestampCalculator"
// input_side_packet: "CLOCK:monotonic_clock"
// input_stream: "packet_stream"
// output_stream: "packet_clocktime_stream"
// }
//
class ClockTimestampCalculator : public CalculatorBase {
public:
ClockTimestampCalculator() {}
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
// Clock object.
std::shared_ptr<::mediapipe::Clock> clock_;
};
REGISTER_CALCULATOR(ClockTimestampCalculator);
::mediapipe::Status ClockTimestampCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK_EQ(cc->Inputs().NumEntries(), 1);
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Inputs().Index(0).SetAny();
cc->Outputs().Index(0).Set<absl::Time>();
// Optional Clock input side packet.
if (cc->InputSidePackets().HasTag(kClockTag)) {
cc->InputSidePackets()
.Tag(kClockTag)
.Set<std::shared_ptr<::mediapipe::Clock>>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status ClockTimestampCalculator::Open(CalculatorContext* cc) {
// Direct passthrough, as far as timestamp and bounds are concerned.
cc->SetOffset(TimestampDiff(0));
// Initialize the clock.
if (cc->InputSidePackets().HasTag(kClockTag)) {
clock_ = cc->InputSidePackets()
.Tag("CLOCK")
.Get<std::shared_ptr<::mediapipe::Clock>>();
} else {
clock_.reset(
::mediapipe::MonotonicClock::CreateSynchronizedMonotonicClock());
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status ClockTimestampCalculator::Process(CalculatorContext* cc) {
// Push the Time packet to output.
auto timestamp_packet = MakePacket<absl::Time>(clock_->TimeNow());
cc->Outputs().Index(0).AddPacket(timestamp_packet.At(cc->InputTimestamp()));
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -27,6 +27,7 @@ namespace mediapipe {
namespace {
constexpr char kDetectionTag[] = "DETECTION";
constexpr char kDetectionsTag[] = "DETECTIONS";
constexpr char kDetectionListTag[] = "DETECTION_LIST";
constexpr char kRenderDataTag[] = "RENDER_DATA";
@ -62,6 +63,7 @@ constexpr float kNumScoreDecimalDigitsMultipler = 100;
// Example config:
// node {
// calculator: "DetectionsToRenderDataCalculator"
// input_stream: "DETECTION:detection"
// input_stream: "DETECTIONS:detections"
// input_stream: "DETECTION_LIST:detection_list"
// output_stream: "RENDER_DATA:render_data"
@ -123,9 +125,13 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator);
::mediapipe::Status DetectionsToRenderDataCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
cc->Inputs().HasTag(kDetectionsTag))
cc->Inputs().HasTag(kDetectionsTag) ||
cc->Inputs().HasTag(kDetectionTag))
<< "None of the input streams are provided.";
if (cc->Inputs().HasTag(kDetectionTag)) {
cc->Inputs().Tag(kDetectionTag).Set<Detection>();
}
if (cc->Inputs().HasTag(kDetectionListTag)) {
cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
}
@ -155,8 +161,10 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator);
const bool has_detection_from_vector =
cc->Inputs().HasTag(kDetectionsTag) &&
!cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>().empty();
const bool has_single_detection = cc->Inputs().HasTag(kDetectionTag) &&
!cc->Inputs().Tag(kDetectionTag).IsEmpty();
if (!options.produce_empty_packet() && !has_detection_from_list &&
!has_detection_from_vector) {
!has_detection_from_vector && !has_single_detection) {
return ::mediapipe::OkStatus();
}
@ -176,6 +184,10 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator);
AddDetectionToRenderData(detection, options, render_data.get());
}
}
if (has_single_detection) {
AddDetectionToRenderData(cc->Inputs().Tag(kDetectionTag).Get<Detection>(),
options, render_data.get());
}
cc->Outputs()
.Tag(kRenderDataTag)
.Add(render_data.release(), cc->InputTimestamp());

View File

@ -76,7 +76,7 @@ Detection ConvertLandmarksToDetection(const NormalizedLandmarkList& landmarks) {
// node {
// calculator: "LandmarksToDetectionCalculator"
// input_stream: "NORM_LANDMARKS:landmarks"
// output_stream: "DETECTIONS:detections"
// output_stream: "DETECTION:detections"
// }
class LandmarksToDetectionCalculator : public CalculatorBase {
public:

View File

@ -303,12 +303,12 @@ class NonMaxSuppressionCalculator : public CalculatorBase {
IndexedScores candidates;
output_detections->clear();
while (!remained_indexed_scores.empty()) {
const int original_indexed_scores_size = remained_indexed_scores.size();
const auto& detection = detections[remained_indexed_scores[0].first];
if (options_.min_score_threshold() > 0 &&
detection.score(0) < options_.min_score_threshold()) {
break;
}
remained.clear();
candidates.clear();
const Location location(detection.location_data());
@ -365,8 +365,15 @@ class NonMaxSuppressionCalculator : public CalculatorBase {
keypoint->set_y(keypoints[i * 2 + 1] / total_score);
}
}
remained_indexed_scores = std::move(remained);
output_detections->push_back(weighted_detection);
// Breaks the loop if the size of indexed scores doesn't change after an
// iteration.
if (original_indexed_scores_size == remained.size()) {
break;
} else {
remained_indexed_scores = std::move(remained);
}
}
}

View File

@ -2,12 +2,12 @@
***Experimental Only***
The MediaPipe Android archive library is a convenient way to use MediaPipe with
Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can be
used by all projects. Instead, developers need to add a mediapipe_aar() target
to generate a custom AAR file for their own projects. This is necessary in order
to include specific resources such as MediaPipe calculators needed for each
project.
The MediaPipe Android Archive (AAR) library is a convenient way to use MediaPipe
with Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can
be used by all projects. Instead, developers need to add a mediapipe_aar()
target to generate a custom AAR file for their own projects. This is necessary
in order to include specific resources such as MediaPipe calculators needed for
each project.
### Steps to build a MediaPipe AAR

View File

@ -0,0 +1,327 @@
# Building MediaPipe Examples
* [Android](#android)
* [iOS](#ios)
* [Desktop](#desktop)
## Android
### Prerequisite
* Java Runtime.
* Android SDK release 28.0.3 and above.
* Android NDK r18b and above.
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
below for Android Studio setup). However, if you prefer using MediaPipe without
Android Studio, please run
[`setup_android_sdk_and_ndk.sh`](https://github.com/google/mediapipe/tree/master/setup_android_sdk_and_ndk.sh)
to download and setup Android SDK and NDK before building any Android example
apps.
If Android SDK and NDK are already installed (e.g., by Android Studio), set
$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK.
```bash
export ANDROID_HOME=<path to the Android SDK>
export ANDROID_NDK_HOME=<path to the Android NDK>
```
In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch
to a lower Android API level. You can achieve this by specifying `api_level =
<api level integer>` in android_ndk_repository() and/or android_sdk_repository()
in the [`WORKSPACE`](https://github.com/google/mediapipe/tree/master/WORKSPACE) file.
Please verify all the necessary packages are installed.
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 17c or above
### Option 1: Build with Bazel in Command Line
1. To build an Android example app, for instance, for MediaPipe Hand, run:
Note: To reduce the binary size, consider appending `--linkopt="-s"` to the
command below to strip symbols.
~~~
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu
```
~~~
1. Install it on a device with:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
```
### Option 2: Build with Bazel in Android Studio
The MediaPipe project can be imported into Android Studio using the Bazel
plugins. This allows the MediaPipe examples to be built and modified in Android
Studio.
To incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
The steps below use Android Studio 3.5 to build and install a MediaPipe example
app:
1. Install and launch Android Studio 3.5.
2. Select `Configure` | `SDK Manager` | `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` | `SDK Manager` | `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 17c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` | `Plugins` install `Bazel`.
6. On Linux, select `File` | `Settings`| `Bazel settings`. On macos, select
`Android Studio` | `Preferences` | `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` | `Sync` | `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
[`WORKSPACE`](https://github.com/google/mediapipe/tree/master/WORKSPACE) file to point to your
SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` | `Edit Configurations...`.
* Select `Templates` | `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.
## iOS
### Prerequisite
1. Install [Xcode](https://developer.apple.com/xcode/) and the Command Line
Tools.
Follow Apple's instructions to obtain the required development certificates
and provisioning profiles for your iOS device. Install the Command Line
Tools by
```bash
xcode-select --install
```
2. Install [Bazel](https://bazel.build/).
We recommend using [Homebrew](https://brew.sh/) to get the latest version.
3. Set Python 3.7 as the default Python version and install the Python "six"
library.
To make Mediapipe work with TensorFlow, please set Python 3.7 as the default
Python version and install the Python "six" library.
```bash
pip3 install --user six
```
4. Clone the MediaPipe repository.
```bash
git clone https://github.com/google/mediapipe.git
```
5. Symlink or copy your provisioning profile to
`mediapipe/mediapipe/provisioning_profile.mobileprovision`.
```bash
cd mediapipe
ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision
```
Tip: You can use this command to see the provisioning profiles you have
previously downloaded using Xcode: `open
~/Library/MobileDevice/"Provisioning Profiles"`. If there are none, generate
and download a profile on
[Apple's developer site](https://developer.apple.com/account/resources/).
### Option 1: Build with Bazel in Command Line
1. Modify the `bundle_id` field of the app's `ios_application` target to use
your own identifier. For instance, for
[MediaPipe Hand](./hand_tracking_mobile_gpu.md), the `bundle_id` is in the
`HandTrackingGpuApp` target in the
[BUILD](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD)
file.
2. Again using [MediaPipe Hand](./hand_tracking_mobile_gpu.md) for example,
run:
```bash
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp
```
You may see a permission request from `codesign` in order to sign the app.
3. In Xcode, open the `Devices and Simulators` window (command-shift-2).
4. Make sure your device is connected. You will see a list of installed apps.
Press the "+" button under the list, and select the `.ipa` file built by
Bazel.
5. You can now run the app on your device.
### Option 2: Build in Xcode
Note: This workflow requires a separate tool in addition to Bazel. If it fails
to work for some reason, please resort to the command-line build instructions in
the previous section.
1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating
Xcode projects from Bazel build configurations.
```bash
# cd out of the mediapipe directory, then:
git clone https://github.com/bazelbuild/tulsi.git
cd tulsi
# remove Xcode version from Tulsi's .bazelrc (see http://github.com/bazelbuild/tulsi#building-and-installing):
sed -i .orig '/xcode_version/d' .bazelrc
# build and run Tulsi:
sh build_and_run.sh
```
This will install `Tulsi.app` inside the `Applications` directory in your
home directory.
2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app.
Important: If Tulsi displays an error saying "Bazel could not be found",
press the "Bazel..." button in the Packages tab and select the `bazel`
executable in your homebrew `/bin/` directory.
3. Select the MediaPipe config in the Configs tab, then press the Generate
button below. You will be asked for a location to save the Xcode project.
Once the project is generated, it will be opened in Xcode.
4. You can now select any of the MediaPipe demos in the target menu, and build
and run them as normal.
Note: When you ask Xcode to run an app, by default it will use the Debug
configuration. Some of our demos are computationally heavy; you may want to
use the Release configuration for better performance.
Tip: To switch build configuration in Xcode, click on the target menu,
choose "Edit Scheme...", select the Run action, and switch the Build
Configuration from Debug to Release. Note that this is set independently for
each target.
## Desktop
### Option 1: Running on CPU
1. To build, for example, [MediaPipe Hand](./hand_tracking_mobile_gpu.md), run:
```bash
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible.
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt
```
### Option 2: Running on GPU
Note: This currently works only on Linux, and please first follow
[OpenGL ES Setup on Linux Desktop](./gpu.md#opengl-es-setup-on-linux-desktop).
1. To build, for example, [MediaPipe Hand](./hand_tracking_mobile_gpu.md), run:
```bash
bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \
mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible, or GPU drivers not setup
properly.
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
```

Binary file not shown.

View File

@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facedetectioncpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the FaceDetectionCpuApp
target.

View File

@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facedetectiongpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the FaceDetectionGpuApp
target.

View File

@ -40,7 +40,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the FaceMeshGpuApp
target.

View File

@ -41,7 +41,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the HandDetectionGpuApp
target.

View File

@ -129,6 +129,7 @@ node {
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "NORM_RECT:hand_rect_from_landmarks"
output_stream: "PRESENCE:hand_presence"
output_stream: "HANDEDNESS:handedness"
}
# Caches a hand rectangle fed back from HandLandmarkSubgraph, and upon the
@ -171,6 +172,7 @@ node {
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "NORM_RECT:hand_rect"
input_stream: "DETECTIONS:palm_detections"
input_stream: "HANDEDNESS:handedness"
output_stream: "IMAGE:output_video"
}

View File

@ -1,725 +1,154 @@
# Hand Tracking (GPU)
# MediaPipe Hand
This doc focuses on the
[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
that performs hand tracking with TensorFlow Lite on GPU. It is related to the
[hand detection example](./hand_detection_mobile_gpu.md), and we recommend users
to review the hand detection example first.
## Overview
For overall context on hand detection and hand tracking, please read this
[Google AI Blog post](https://mediapipe.page.link/handgoogleaiblog).
The ability to perceive the shape and motion of hands can be a vital component
in improving the user experience across a variety of technological domains and
platforms. For example, it can form the basis for sign language understanding
and hand gesture control, and can also enable the overlay of digital content and
information on top of the physical world in augmented reality. While coming
naturally to people, robust real-time hand perception is a decidedly challenging
computer vision task, as hands often occlude themselves or each other (e.g.
finger/palm occlusions and hand shakes) and lack high contrast patterns.
![hand_tracking_android_gpu.gif](images/mobile/hand_tracking_android_gpu.gif)
In the visualization above, the red dots represent the localized hand landmarks,
and the green lines are simply connections between selected landmark pairs for
visualization of the hand skeleton. The red box represents a hand rectangle that
covers the entire hand, derived either from hand detection (see
[hand detection example](./hand_detection_mobile_gpu.md)) or from the pervious
round of hand landmark localization using an ML model (see also
[model card](https://mediapipe.page.link/handmc)). Hand landmark localization is
performed only within the hand rectangle for computational efficiency and
accuracy, and hand detection is only invoked when landmark localization could
not identify hand presence in the previous iteration.
The example can also run in a mode that localizes hand landmarks in 3D (i.e.,
estimating an extra z coordinate):
MediaPipe Hand is a high-fidelity hand and finger tracking solution. It employs
machine learning (ML) to infer 21 3D landmarks of a hand from just a single
frame. Whereas current state-of-the-art approaches rely primarily on powerful
desktop environments for inference, our method achieves real-time performance on
a mobile phone, and even scales to multiple hands. We hope that providing this
hand perception functionality to the wider research and development community
will result in an emergence of creative use cases, stimulating new applications
and new research avenues.
![hand_tracking_3d_android_gpu.gif](images/mobile/hand_tracking_3d_android_gpu.gif)
In the visualization above, the localized hand landmarks are represented by dots
in different shades, with the brighter ones denoting landmarks closer to the
camera.
## Android
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu)
An arm64 APK can be
[downloaded here](https://drive.google.com/open?id=1uCjS0y0O0dTDItsMh8x2cf4-l3uHW1vE),
and a version running the 3D mode can be
[downloaded here](https://drive.google.com/open?id=1tGgzOGkcZglJO2i7e8NKSxJgVtJYS3ka).
To build the app yourself, run:
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu
```
To build for the 3D mode, run:
```bash
bazel build -c opt --config=android_arm64 --define 3D=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu
```
Once the app is built, install it on Android device with:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
```
## iOS
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
examples and generating an Xcode project. This will be the HandDetectionGpuApp
target.
To build on the command line:
```bash
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp
```
To build for the 3D mode, run:
```bash
bazel build -c opt --config=ios_arm64 --define 3D=true mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp
```
## Graph
The hand tracking [main graph](#main-graph) internally utilizes a
[hand detection subgraph](#hand-detection-subgraph), a
[hand landmark subgraph](#hand-landmark-subgraph) and a
[renderer subgraph](#renderer-subgraph).
The subgraphs show up in the main graph visualization as nodes colored in
purple, and the subgraph itself can also be visualized just like a regular
graph. For more information on how to visualize a graph that includes subgraphs,
see the Visualizing Subgraphs section in the
[visualizer documentation](./visualizer.md).
### Main Graph
![hand_tracking_mobile_graph](images/mobile/hand_tracking_mobile.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
```bash
# MediaPipe graph that performs hand tracking with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and
# mediapipe/examples/ios/handtrackinggpu.
# Images coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:hand_rect"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Caches a hand-presence decision fed back from HandLandmarkSubgraph, and upon
# the arrival of the next input image sends out the cached decision with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous hand-presence decision. Note that upon the arrival
# of the very first input image, an empty packet is sent out to jump start the
# feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:throttled_input_video"
input_stream: "LOOP:hand_presence"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_hand_presence"
}
# Drops the incoming image if HandLandmarkSubgraph was able to identify hand
# presence in the previous image. Otherwise, passes the incoming image through
# to trigger a new round of hand detection in HandDetectionSubgraph.
node {
calculator: "GateCalculator"
input_stream: "throttled_input_video"
input_stream: "DISALLOW:prev_hand_presence"
output_stream: "hand_detection_input_video"
node_options: {
[type.googleapis.com/mediapipe.GateCalculatorOptions] {
empty_packets_as_allow: true
}
}
}
# Subgraph that detections hands (see hand_detection_gpu.pbtxt).
node {
calculator: "HandDetectionSubgraph"
input_stream: "hand_detection_input_video"
output_stream: "DETECTIONS:palm_detections"
output_stream: "NORM_RECT:hand_rect_from_palm_detections"
}
# Subgraph that localizes hand landmarks (see hand_landmark_gpu.pbtxt).
node {
calculator: "HandLandmarkSubgraph"
input_stream: "IMAGE:throttled_input_video"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "NORM_RECT:hand_rect_from_landmarks"
output_stream: "PRESENCE:hand_presence"
}
# Caches a hand rectangle fed back from HandLandmarkSubgraph, and upon the
# arrival of the next input image sends out the cached rectangle with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous hand rectangle. Note that upon the arrival of the
# very first input image, an empty packet is sent out to jump start the
# feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:throttled_input_video"
input_stream: "LOOP:hand_rect_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_hand_rect_from_landmarks"
}
# Merges a stream of hand rectangles generated by HandDetectionSubgraph and that
# generated by HandLandmarkSubgraph into a single output stream by selecting
# between one of the two streams. The formal is selected if the incoming packet
# is not empty, i.e., hand detection is performed on the current image by
# HandDetectionSubgraph (because HandLandmarkSubgraph could not identify hand
# presence in the previous image). Otherwise, the latter is selected, which is
# never empty because HandLandmarkSubgraphs processes all images (that went
# through FlowLimiterCaculator).
node {
calculator: "MergeCalculator"
input_stream: "hand_rect_from_palm_detections"
input_stream: "prev_hand_rect_from_landmarks"
output_stream: "hand_rect"
}
# Subgraph that renders annotations and overlays them on top of the input
# images (see renderer_gpu.pbtxt).
node {
calculator: "RendererSubgraph"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "NORM_RECT:hand_rect"
input_stream: "DETECTIONS:palm_detections"
output_stream: "IMAGE:output_video"
}
```
### Hand Detection Subgraph
![hand_detection_gpu_subgraph](images/mobile/hand_detection_gpu_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt)
```bash
# MediaPipe hand detection subgraph.
type: "HandDetectionSubgraph"
input_stream: "input_video"
output_stream: "DETECTIONS:palm_detections"
output_stream: "NORM_RECT:hand_rect_from_palm_detections"
# Transforms the input image on GPU to a 256x256 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
# resulting in potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
scale_mode: FIT
}
}
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "opresolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
use_gpu: true
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "TENSORS_GPU:image_tensor"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS:detection_tensors"
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "palm_detection.tflite"
use_gpu: true
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 5
min_scale: 0.1171875
max_scale: 0.75
input_size_height: 256
input_size_width: 256
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 32
strides: 32
strides: 32
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 1
num_boxes: 2944
num_coords: 18
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 7
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 256.0
y_scale: 256.0
h_scale: 256.0
w_scale: 256.0
min_score_thresh: 0.7
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
return_empty_detections: true
}
}
}
# Maps detection label IDs to the corresponding label text ("Palm"). The label
# map is provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
output_stream: "labeled_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "palm_detection_labelmap.txt"
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:labeled_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:palm_detections"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:image_size"
}
# Converts results of palm detection into a rectangle (normalized by image size)
# that encloses the palm and is rotated such that the line connecting center of
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
# rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTIONS:palm_detections"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:palm_rect"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] {
rotation_vector_start_keypoint_index: 0 # Center of wrist.
rotation_vector_end_keypoint_index: 2 # MCP of middle finger.
rotation_vector_target_angle_degrees: 90
output_zero_rect_for_empty_detections: true
}
}
}
# Expands and shifts the rectangle that contains the palm so that it's likely
# to cover the entire hand.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:palm_rect"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "hand_rect_from_palm_detections"
node_options: {
[type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] {
scale_x: 2.6
scale_y: 2.6
shift_y: -0.5
square_long: true
}
}
}
```
### Hand Landmark Subgraph
![hand_landmark_gpu_subgraph.pbtxt](images/mobile/hand_landmark_gpu_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt)
```bash
# MediaPipe hand landmark localization subgraph.
type: "HandLandmarkSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "NORM_RECT:hand_rect_for_next_frame"
output_stream: "PRESENCE:hand_presence"
# Crops the rectangle that contains a hand from the input image.
node {
calculator: "ImageCroppingCalculator"
input_stream: "IMAGE_GPU:input_video"
input_stream: "NORM_RECT:hand_rect"
output_stream: "IMAGE_GPU:hand_image"
}
# Transforms the input image on GPU to a 256x256 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
# resulting in potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:hand_image"
output_stream: "IMAGE_GPU:transformed_hand_image"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
scale_mode: FIT
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_hand_image"
output_stream: "TENSORS_GPU:image_tensor"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS:output_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "hand_landmark.tflite"
use_gpu: true
}
}
}
# Splits a vector of tensors into multiple vectors.
node {
calculator: "SplitTfLiteTensorVectorCalculator"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
calculator: "TfLiteTensorsToFloatsCalculator"
input_stream: "TENSORS:hand_flag_tensor"
output_stream: "FLOAT:hand_presence_score"
}
# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:hand_presence_score"
output_stream: "FLAG:hand_presence"
node_options: {
[type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] {
threshold: 0.1
}
}
}
# Decodes the landmark tensors into a vector of lanmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TfLiteTensorsToLandmarksCalculator"
input_stream: "TENSORS:landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToLandmarksCalculatorOptions] {
num_landmarks: 21
input_image_width: 256
input_image_height: 256
}
}
}
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
calculator: "LandmarkLetterboxRemovalCalculator"
input_stream: "LANDMARKS:landmarks"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "LANDMARKS:scaled_landmarks"
}
# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:scaled_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:image_size"
}
# Converts hand landmarks to a detection that tightly encloses all landmarks.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:hand_landmarks"
output_stream: "DETECTION:hand_detection"
}
# Converts the hand detection into a rectangle (normalized by image size)
# that encloses the hand and is rotated such that the line connecting center of
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
# rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:hand_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:hand_rect_from_landmarks"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] {
rotation_vector_start_keypoint_index: 0 # Center of wrist.
rotation_vector_end_keypoint_index: 9 # MCP of middle finger.
rotation_vector_target_angle_degrees: 90
}
}
}
# Expands the hand rectangle so that in the next video frame it's likely to
# still contain the hand even with some motion.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:hand_rect_from_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "hand_rect_for_next_frame"
node_options: {
[type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] {
scale_x: 1.6
scale_y: 1.6
square_long: true
}
}
}
```
### Renderer Subgraph
![hand_renderer_gpu_subgraph.pbtxt](images/mobile/hand_renderer_gpu_subgraph.png)
[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt)
```bash
# MediaPipe hand tracking rendering subgraph.
type: "RendererSubgraph"
input_stream: "IMAGE:input_image"
input_stream: "DETECTIONS:detections"
input_stream: "LANDMARKS:landmarks"
input_stream: "NORM_RECT:rect"
output_stream: "IMAGE:output_image"
# Converts detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:detection_render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 0 g: 255 b: 0 }
}
}
}
# Converts landmarks to drawing primitives for annotation overlay.
node {
calculator: "LandmarksToRenderDataCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
output_stream: "RENDER_DATA:landmark_render_data"
node_options: {
[type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] {
landmark_connections: 0
landmark_connections: 1
landmark_connections: 1
landmark_connections: 2
landmark_connections: 2
landmark_connections: 3
landmark_connections: 3
landmark_connections: 4
landmark_connections: 0
landmark_connections: 5
landmark_connections: 5
landmark_connections: 6
landmark_connections: 6
landmark_connections: 7
landmark_connections: 7
landmark_connections: 8
landmark_connections: 5
landmark_connections: 9
landmark_connections: 9
landmark_connections: 10
landmark_connections: 10
landmark_connections: 11
landmark_connections: 11
landmark_connections: 12
landmark_connections: 9
landmark_connections: 13
landmark_connections: 13
landmark_connections: 14
landmark_connections: 14
landmark_connections: 15
landmark_connections: 15
landmark_connections: 16
landmark_connections: 13
landmark_connections: 17
landmark_connections: 0
landmark_connections: 17
landmark_connections: 17
landmark_connections: 18
landmark_connections: 18
landmark_connections: 19
landmark_connections: 19
landmark_connections: 20
landmark_color { r: 255 g: 0 b: 0 }
connection_color { r: 0 g: 255 b: 0 }
thickness: 4.0
}
}
}
# Converts normalized rects to drawing primitives for annotation overlay.
node {
calculator: "RectToRenderDataCalculator"
input_stream: "NORM_RECT:rect"
output_stream: "RENDER_DATA:rect_render_data"
node_options: {
[type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] {
filled: false
color { r: 255 g: 0 b: 0 }
thickness: 4.0
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:input_image"
input_stream: "detection_render_data"
input_stream: "landmark_render_data"
input_stream: "rect_render_data"
output_stream: "IMAGE_GPU:output_image"
}
```
*Fig 1. Tracked 3D hand landmarks are represented by dots in different shades,
with the brighter ones denoting landmarks closer to the camera.*
## ML Pipeline
MediaPipe Hand utilizes an ML pipeline consisting of multiple models working
together: A palm detection model that operates on the full image and returns an
oriented hand bounding box. A hand landmark model that operates on the cropped
image region defined by the palm detector and returns high-fidelity 3D hand
keypoints. This architecture is similar to that employed by our recently
released [MediaPipe Face Mesh](./face_mesh_mobile_gpu.md) solution.
Providing the accurately cropped hand image to the hand landmark model
drastically reduces the need for data augmentation (e.g. rotations, translation
and scale) and instead allows the network to dedicate most of its capacity
towards coordinate prediction accuracy. In addition, in our pipeline the crops
can also be generated based on the hand landmarks identified in the previous
frame, and only when the landmark model could no longer identify hand presence
is palm detection invoked to relocalize the hand.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt),
which internally utilizes a
[palm/hand detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt),
a
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt)
and a
[renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt).
For more information on how to visualize a graph and its associated subgraphs,
please see the [visualizer documentation](./visualizer.md).
## Models
### Palm Detection Model
To detect initial hand locations, we designed a
[single-shot detector](https://arxiv.org/abs/1512.02325) model optimized for
mobile real-time uses in a manner similar to the face detection model in
[MediaPipe Face Mesh](./face_mesh_mobile_gpu.md). Detecting hands is a decidedly
complex task: our model has to work across a variety of hand sizes with a large
scale span (~20x) relative to the image frame and be able to detect occluded and
self-occluded hands. Whereas faces have high contrast patterns, e.g., in the eye
and mouth region, the lack of such features in hands makes it comparatively
difficult to detect them reliably from their visual features alone. Instead,
providing additional context, like arm, body, or person features, aids accurate
hand localization.
Our method addresses the above challenges using different strategies. First, we
train a palm detector instead of a hand detector, since estimating bounding
boxes of rigid objects like palms and fists is significantly simpler than
detecting hands with articulated fingers. In addition, as palms are smaller
objects, the non-maximum suppression algorithm works well even for two-hand
self-occlusion cases, like handshakes. Moreover, palms can be modelled using
square bounding boxes (anchors in ML terminology) ignoring other aspect ratios,
and therefore reducing the number of anchors by a factor of 3-5. Second, an
encoder-decoder feature extractor is used for bigger scene context awareness
even for small objects (similar to the RetinaNet approach). Lastly, we minimize
the focal loss during training to support a large amount of anchors resulting
from the high scale variance.
With the above techniques, we achieve an average precision of 95.7% in palm
detection. Using a regular cross entropy loss and no decoder gives a baseline of
just 86.22%.
### Hand Landmark Model
After the palm detection over the whole image our subsequent hand landmark model
performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
the detected hand regions via regression, that is direct coordinate prediction.
The model learns a consistent internal hand pose representation and is robust
even to partially visible hands and self-occlusions.
To obtain ground truth data, we have manually annotated ~30K real-world images
with 21 3D coordinates, as shown below (we take Z-value from image depth map, if
it exists per corresponding coordinate). To better cover the possible hand poses
and provide additional supervision on the nature of hand geometry, we also
render a high-quality synthetic hand model over various backgrounds and map it
to the corresponding 3D coordinates.
![hand_crops.png](images/mobile/hand_crops.png)
*Fig 2. Top: Aligned hand crops passed to the tracking network with ground truth
annotation. Bottom: Rendered synthetic hand images with ground truth
annotation.*
## Example Apps
Please see the [general instructions](./building_examples.md) for how to build
MediaPipe examples for different platforms.
#### Main Example
* Android:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu),
[Prebuilt ARM64 APK](https://drive.google.com/open?id=1uCjS0y0O0dTDItsMh8x2cf4-l3uHW1vE)
* iOS:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu)
* Desktop:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking)
#### With Multi-hand Support
* Android:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu),
[Prebuilt ARM64 APK](https://drive.google.com/open?id=1Wk6V9EVaz1ks_MInPqqVGvvJD01SGXDc)
* iOS:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu)
* Desktop:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking)
#### Palm/Hand Detection Only (no landmarks)
* Android:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectionggpu),
[Prebuilt ARM64 APK](https://drive.google.com/open?id=1qUlTtH7Ydg-wl_H6VVL8vueu2UCTu37E)
* iOS:
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu)
## Resources
* [Google AI Blog: On-Device, Real-Time Hand Tracking with MediaPipe](https://ai.googleblog.com/2019/08/on-device-real-time-hand-tracking-with.html)
* [TensorFlow Blog: Face and hand tracking in the browser with MediaPipe and
TensorFlow.js](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
* Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc)

View File

@ -32,7 +32,7 @@ We will be using the following graph, [`edge_detection_mobile_gpu.pbtxt`]:
```
# MediaPipe graph that performs GPU Sobel edge detection on a live video stream.
# Used in the examples
# mediapipe/examples/android/src/java/com/mediapipe/apps/edgedetectiongpu.
# mediapipe/examples/android/src/java/com/mediapipe/apps/basic.
# mediapipe/examples/ios/edgedetectiongpu.
# Images coming into and out of the graph.
@ -80,15 +80,15 @@ applications using `bazel`.
Create a new directory where you will create your Android application. For
example, the complete code of this tutorial can be found at
`mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu`.
We will refer to this path as `$APPLICATION_PATH` throughout the codelab.
`mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic`. We
will refer to this path as `$APPLICATION_PATH` throughout the codelab.
Note that in the path to the application:
* The application is named `edgedetectiongpu`.
* The application is named `helloworld`.
* The `$PACKAGE_PATH` of the application is
`com.google.mediapipe.apps.edgdetectiongpu`. This is used in code snippets in
this tutorial, so please remember to use your own `$PACKAGE_PATH` when you
`com.google.mediapipe.apps.basic`. This is used in code snippets in this
tutorial, so please remember to use your own `$PACKAGE_PATH` when you
copy/use the code snippets.
Add a file `activity_main.xml` to `$APPLICATION_PATH/res/layout`. This displays
@ -119,7 +119,7 @@ Add a simple `MainActivity.java` to `$APPLICATION_PATH` which loads the content
of the `activity_main.xml` layout as shown below:
```
package com.google.mediapipe.apps.edgedetectiongpu;
package com.google.mediapipe.apps.basic;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
@ -141,7 +141,7 @@ launches `MainActivity` on application start:
```
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.edgedetectiongpu">
package="com.google.mediapipe.apps.basic">
<uses-sdk
android:minSdkVersion="19"
@ -149,11 +149,11 @@ launches `MainActivity` on application start:
<application
android:allowBackup="true"
android:label="@string/app_name"
android:label="${appName}"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:name="${mainActivity}"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
@ -166,17 +166,8 @@ launches `MainActivity` on application start:
</manifest>
```
To get `@string/app_name`, we need to add a file `strings.xml` to
`$APPLICATION_PATH/res/values/`:
```
<resources>
<string name="app_name" translatable="false">Edge Detection GPU</string>
</resources>
```
Also, in our application we are using a `Theme.AppCompat` theme in the app, so
we need appropriate theme references. Add `colors.xml` to
In our application we are using a `Theme.AppCompat` theme in the app, so we need
appropriate theme references. Add `colors.xml` to
`$APPLICATION_PATH/res/values/`:
```
@ -204,11 +195,13 @@ Add `styles.xml` to `$APPLICATION_PATH/res/values/`:
</resources>
```
To build the application, add a `BUILD` file to `$APPLICATION_PATH`:
To build the application, add a `BUILD` file to `$APPLICATION_PATH`, and
`${appName}` and `${mainActivity}` in the manifest will be replaced by strings
specified in `BUILD` as shown below.
```
android_library(
name = "mediapipe_lib",
name = "basic_lib",
srcs = glob(["*.java"]),
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
@ -219,34 +212,36 @@ android_library(
)
android_binary(
name = "edgedetectiongpu",
aapt_version = "aapt2",
name = "helloworld",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.edgedetectiongpu"},
manifest_values = {
"applicationId": "com.google.mediapipe.apps.basic",
"appName": "Hello World",
"mainActivity": ".MainActivity",
},
multidex = "native",
deps = [
":mediapipe_lib",
":basic_lib",
],
)
```
The `android_library` rule adds dependencies for `MainActivity`, resource files
and `AndroidManifest.xml`.
The `android_binary` rule, uses the `mediapipe_lib` Android library generated to
The `android_binary` rule, uses the `basic_lib` Android library generated to
build a binary APK for installation on your Android device.
To build the app, use the following command:
```
bazel build -c opt --config=android_arm64 $APPLICATION_PATH
bazel build -c opt --config=android_arm64 $APPLICATION_PATH:helloworld
```
Install the generated APK file using `adb install`. For example:
```
adb install bazel-bin/$APPLICATION_PATH/edgedetectiongpu.apk
adb install bazel-bin/$APPLICATION_PATH/helloworld.apk
```
Open the application on your device. It should display a screen with the text
@ -438,22 +433,58 @@ visible so that we can start seeing frames from the `previewFrameTexture`.
However, before starting the camera, we need to decide which camera we want to
use. [`CameraXPreviewHelper`] inherits from [`CameraHelper`] which provides two
options, `FRONT` and `BACK`. We will use `BACK` camera for this application to
perform edge detection on a live scene that we view from the camera.
options, `FRONT` and `BACK`. We can pass in the decision from the `BUILD` file
as metadata such that no code change is required to build a another version of
the app using a different camera.
Add the following line to define `CAMERA_FACING` for our application,
Assuming we want to use `BACK` camera to perform edge detection on a live scene
that we view from the camera, add the metadata into `AndroidManifest.xml`:
```
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
...
<meta-data android:name="cameraFacingFront" android:value="${cameraFacingFront}"/>
</application>
</manifest>
```
`CAMERA_FACING` is a static variable as we will use the same camera throughout
the application from start to finish.
and specify the selection in `BUILD` in the `helloworld` android binary rule
with a new entry in `manifest_values`:
```
manifest_values = {
"applicationId": "com.google.mediapipe.apps.basic",
"appName": "Hello World",
"mainActivity": ".MainActivity",
"cameraFacingFront": "False",
},
```
Now, in `MainActivity` to retrieve the metadata specified in `manifest_values`,
add an [`ApplicationInfo`] object:
```
private ApplicationInfo applicationInfo;
```
In the `onCreate()` function, add:
```
try {
applicationInfo =
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
} catch (NameNotFoundException e) {
Log.e(TAG, "Cannot find application info: " + e);
}
```
Now add the following line at the end of the `startCamera()` function:
```
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
CameraHelper.CameraFacing cameraFacing =
applicationInfo.metaData.getBoolean("cameraFacingFront", false)
? CameraHelper.CameraFacing.FRONT
: CameraHelper.CameraFacing.BACK;
cameraHelper.startCamera(this, cameraFacing, /*surfaceTexture=*/ null);
```
At this point, the application should build successfully. However, when you run
@ -595,24 +626,13 @@ build rule:
MediaPipe graphs are `.pbtxt` files, but to use them in the application, we need
to use the `mediapipe_binary_graph` build rule to generate a `.binarypb` file.
We can then use an application specific alias for the graph via the `genrule`
build rule. Add the following `genrule` to use an alias for the edge detection
graph:
```
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph"],
outs = ["edgedetectiongpu.binarypb"],
cmd = "cp $< $@",
)
```
Then in the `mediapipe_lib` build rule, add assets:
In the `helloworld` android binary build rule, add the `mediapipe_binary_graph`
target specific to the graph as an asset:
```
assets = [
":binary_graph",
"//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph",
],
assets_dir = "",
```
@ -620,6 +640,26 @@ assets_dir = "",
In the `assets` build rule, you can also add other assets such as TensorFlowLite
models used in your graph.
In addition, add additional `manifest_values` for properties specific to the
graph, to be later retrieved in `MainActivity`:
```
manifest_values = {
"applicationId": "com.google.mediapipe.apps.basic",
"appName": "Hello World",
"mainActivity": ".MainActivity",
"cameraFacingFront": "False",
"binaryGraphName": "mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
},
```
Note that `binaryGraphName` indicates the filename of the binary graph,
determined by the `output_name` field in the `mediapipe_binary_graph` target.
`inputVideoStreamName` and `outputVideoStreamName` are the input and output
video stream name specified in the graph respectively.
Now, the `MainActivity` needs to load the MediaPipe framework. Also, the
framework uses OpenCV, so `MainActvity` should also load `OpenCV`. Use the
following code in `MainActivity` (inside the class, but not inside any function)
@ -648,15 +688,6 @@ Initialize the asset manager in `onCreate(Bundle)` before initializing
AndroidAssetUtil.initializeNativeAssetManager(this);
```
Declare a static variable with the graph name, the name of the input stream and
the name of the output stream:
```
private static final String BINARY_GRAPH_NAME = "edgedetectiongpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
```
Now, we need to setup a [`FrameProcessor`] object that sends camera frames
prepared by the `converter` to the MediaPipe graph and runs the graph, prepares
the output and then updates the `previewDisplayView` to display the output. Add
@ -673,9 +704,9 @@ processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
applicationInfo.metaData.getString("binaryGraphName"),
applicationInfo.metaData.getString("inputVideoStreamName"),
applicationInfo.metaData.getString("outputVideoStreamName"));
```
The `processor` needs to consume the converted frames from the `converter` for
@ -712,8 +743,9 @@ feed! Congrats!
![edge_detection_android_gpu_gif](images/mobile/edge_detection_android_gpu.gif)
If you ran into any issues, please see the full code of the tutorial
[here](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu).
[here](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic).
[`ApplicationInfo`]:https://developer.android.com/reference/android/content/pm/ApplicationInfo
[`AndroidAssetUtil`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/framework/AndroidAssetUtil.java
[Bazel]:https://bazel.build/
[`CameraHelper`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/CameraHelper.java
@ -721,7 +753,6 @@ If you ran into any issues, please see the full code of the tutorial
[`CameraXPreviewHelper`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java
[developer options]:https://developer.android.com/studio/debug/dev-options
[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt
[`EdgeDetectionGPU` example]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu/
[`EglManager`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/glutil/EglManager.java
[`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java
[`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout

View File

@ -183,7 +183,7 @@ bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/edgedetectiongpu:Ed
Then, go back to XCode, open Window > Devices and Simulators, select your
device, and add the `.ipa` file generated by the command above to your device.
Here is the document on [setting up and compiling](./mediapipe_ios_setup.md) iOS
Here is the document on [setting up and compiling](./building_examples.md#ios) iOS
MediaPipe apps.
Open the application on your device. Since it is empty, it should display a
@ -348,7 +348,7 @@ responded. Add the following code to `viewWillAppear:animated`:
```
[_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) {
if (granted) {
dispatch_queue(_videoQueue, ^{
dispatch_async(_videoQueue, ^{
[_cameraSource start];
});
}
@ -405,7 +405,7 @@ Declare a static constant with the name of the graph, the input stream and the
output stream:
```
static NSString* const kGraphName = @"android_gpu";
static NSString* const kGraphName = @"mobile_gpu";
static const char* kInputStream = "input_video";
static const char* kOutputStream = "output_video";
@ -483,7 +483,7 @@ in our app:
NSLog(@"Failed to start graph: %@", error);
}
dispatch_queue(_videoQueue, ^{
dispatch_async(_videoQueue, ^{
[_cameraSource start];
});
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 163 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 299 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 107 KiB

After

Width:  |  Height:  |  Size: 293 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 256 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -16,18 +16,15 @@ Choose your operating system:
- [Installing on Debian and Ubuntu](#installing-on-debian-and-ubuntu)
- [Installing on CentOS](#installing-on-centos)
- [Installing on macOS](#installing-on-macos)
- [Installing on Windows](#installing-on-windows)
- [Installing on Windows Subsystem for Linux (WSL)](#installing-on-windows-subsystem-for-linux-wsl)
- [Installing using Docker](#installing-using-docker)
To build and run Android apps:
To build and run Android example apps, see these
[instuctions](./building_examples.md#android).
- [Setting up Android SDK and NDK](#setting-up-android-sdk-and-ndk)
- [Using MediaPipe with Gradle](#using-mediapipe-with-gradle)
- [Using MediaPipe with Bazel](#using-mediapipe-with-bazel)
To build and run iOS apps:
- Please see the separate [iOS setup](./mediapipe_ios_setup.md) documentation.
To build and run iOS example apps, see these
[instuctions](./building_examples.md#ios).
### Installing on Debian and Ubuntu
@ -355,6 +352,105 @@ To build and run iOS apps:
# Hello World!
```
### Installing on Windows
**Disclaimer**: Running MediaPipe on Windows is experimental.
Note: building MediaPipe Android apps is still not possible on native
Windows. Please do this in WSL instead and see the WSL setup instruction in the
next section.
1. Install [MSYS2](https://www.msys2.org/) and edit the `%PATH%` environment
variable.
If MSYS2 is installed to `C:\msys64`, add `C:\msys64\usr\bin` to your
`%PATH%` environment variable.
2. Install necessary packages.
```
C:\> pacman -S git patch unzip
```
3. Install Python and allow the executable to edit the `%PATH%` environment
variable.
Download Python Windows executable from
https://www.python.org/downloads/windows/ and install.
4. Install Visual C++ Build Tools 2019 and WinSDK
Go to https://visualstudio.microsoft.com/visual-cpp-build-tools, download
build tools, and install Microsoft Visual C++ 2019 Redistributable and
Microsoft Build Tools 2019.
Download the WinSDK from
https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/ and
install.
5. Install Bazel and add the location of the Bazel executable to the `%PATH%`
environment variable.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html)
to install Bazel 2.0 or higher.
6. Set Bazel variables.
```
# Find the exact paths and version numbers from your local version.
C:\> set BAZEL_VS=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools
C:\> set BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC
C:\> set BAZEL_VC_FULL_VERSION=14.25.28610
C:\> set BAZEL_WINSDK_FULL_VERSION=10.1.18362.1
```
7. Checkout MediaPipe repository.
```
C:\Users\Username\mediapipe_repo> git clone https://github.com/google/mediapipe.git
# Change directory into MediaPipe root directory
C:\Users\Username\mediapipe_repo> cd mediapipe
```
8. Install OpenCV.
Download the Windows executable from https://opencv.org/releases/ and
install. We currently use OpenCV 3.4.10. Remember to edit the [`WORKSPACE`]
file if OpenCV is not installed at `C:\opencv`.
```
new_local_repository(
name = "windows_opencv",
build_file = "@//third_party:opencv_windows.BUILD",
path = "C:\\<path to opencv>\\build",
)
```
9. Run the [Hello World desktop example](./hello_world_desktop.md).
```
C:\Users\Username\mediapipe_repo>bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world
C:\Users\Username\mediapipe_repo>set GLOG_logtostderr=1
C:\Users\Username\mediapipe_repo>bazel-bin\mediapipe\examples\desktop\hello_world\hello_world.exe
# should print:
# I20200514 20:43:12.277598 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.278597 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.280613 1200 hello_world.cc:56] Hello World!
```
### Installing on Windows Subsystem for Linux (WSL)
Note: The pre-built OpenCV packages don't support cameras in WSL. Unless you
@ -565,150 +661,8 @@ This will use a Docker image that will isolate mediapipe's installation from the
docker run -i -t mediapipe:latest
``` -->
### Setting up Android SDK and NDK
Requirements:
* Java Runtime.
* Android SDK release 28.0.3 and above.
* Android NDK r17c and above.
MediaPipe recommends setting up Android SDK and NDK via Android Studio, and see
[next section](#setting-up-android-studio-with-mediapipe) for Android Studio
setup. However, if you prefer using MediaPipe without Android Studio, please run
[`setup_android_sdk_and_ndk.sh`] to download and setup Android SDK and NDK
before building any Android example apps.
If Android SDK and NDK are already installed (e.g., by Android Studio), set
$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK.
```bash
export ANDROID_HOME=<path to the Android SDK>
export ANDROID_NDK_HOME=<path to the Android NDK>
```
In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch
to a lower Android API level. You can achieve this by specifying `api_level =
<api level integer>` in android_ndk_repository() and/or android_sdk_repository()
in the [`WORKSPACE`] file.
Please verify all the necessary packages are installed.
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 17c or above
### Using MediaPipe with Gradle
MediaPipe can be used within an existing project, such as a Gradle project,
using the MediaPipe AAR target defined in mediapipe_aar.bzl. Please see the
separate [MediaPipe Android Archive Library](./android_archive_library.md)
documentation.
### Using MediaPipe with Bazel
The MediaPipe project can be imported to Android Studio using the Bazel plugins.
This allows the MediaPipe examples and demos to be built and modified in Android
Studio. To incorporate MediaPipe into an existing Android Studio project, see:
"Using MediaPipe with Gradle". The steps below use Android Studio 3.5 to build
and install a MediaPipe example app.
1. Install and launch Android Studio 3.5.
2. Select `Configure` | `SDK Manager` | `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` | `SDK Manager` | `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 17c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` | `Plugins` install `Bazel`.
6. On Linux, select `File` | `Settings`| `Bazel settings`. On macos, select
`Android Studio` | `Preferences` | `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` | `Sync` | `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
**WORKSPACE** file to point to your SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` | `Edit Configurations...`.
* Select `Templates` | `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.
[`WORKSPACE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE
[`opencv_linux.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_linux.BUILD
[`opencv_macos.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_macos.BUILD
[`ffmpeg_macos.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_macos.BUILD
[`setup_opencv.sh`]: https://github.com/google/mediapipe/tree/master/setup_opencv.sh
[`setup_android_sdk_and_ndk.sh`]: https://github.com/google/mediapipe/tree/master/setup_android_sdk_and_ndk.sh

View File

@ -78,25 +78,8 @@ process new data sets, in the documentation of
PYTHONPATH="${PYTHONPATH};"+`pwd`
```
and then you can import the data set in Python.
```python
import tensorflow as tf
from mediapipe.examples.desktop.media_sequence.demo_dataset import DemoDataset
demo_data_path = '/tmp/demo_data/'
with tf.Graph().as_default():
d = DemoDataset(demo_data_path)
dataset = d.as_dataset('test')
# implement additional processing and batching here
dataset_output = dataset.make_one_shot_iterator().get_next()
images = dataset_output['images']
labels = dataset_output['labels']
with tf.Session() as sess:
images_, labels_ = sess.run([images, labels])
print('The shape of images_ is %s' % str(images_.shape))
print('The shape of labels_ is %s' % str(labels_.shape))
```
and then you can import the data set in Python using
[read_demo_dataset.py](mediapipe/examples/desktop/media_sequence/read_demo_dataset.py)
### Preparing a practical data set
As an example of processing a practical data set, a similar set of commands will

View File

@ -1,118 +0,0 @@
## Setting up MediaPipe for iOS
1. Install [Xcode](https://developer.apple.com/xcode/) and the Command Line
Tools.
Follow Apple's instructions to obtain the required development certificates
and provisioning profiles for your iOS device. Install the Command Line
Tools by
```bash
xcode-select --install
```
2. Install [Bazel 1.1.0](https://bazel.build/).
We recommend using [Homebrew](https://brew.sh/):
```bash
$ brew install https://raw.githubusercontent.com/bazelbuild/homebrew-tap/f8a0fa981bcb1784a0d0823e14867b844e94fb3d/Formula/bazel.rb
```
3. Set Python 3.7 as the default Python version and install the Python "six"
library.
To make Mediapipe work with TensorFlow, please set Python 3.7 as the default
Python version and install the Python "six" library.
```bash
pip3 install --user six
```
4. Clone the MediaPipe repository.
```bash
git clone https://github.com/google/mediapipe.git
```
5. Symlink or copy your provisioning profile to
`mediapipe/mediapipe/provisioning_profile.mobileprovision`.
```bash
cd mediapipe
ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision
```
Tip: You can use this command to see the provisioning profiles you have
previously downloaded using Xcode: `open ~/Library/MobileDevice/"Provisioning Profiles"`.
If there are none, generate and download a profile on [Apple's developer site](https://developer.apple.com/account/resources/).
## Creating an Xcode project
Note: This workflow requires a separate tool in addition to Bazel. If it fails
to work for any reason, you can always use the command-line build instructions
in the next section.
1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating Xcode projects from Bazel
build configurations.
IMPORTANT: At the time of this writing, Tulsi has a small [issue](https://github.com/bazelbuild/tulsi/issues/98)
that keeps it from building with Xcode 10.3. The instructions below apply a
fix from a [pull request](https://github.com/bazelbuild/tulsi/pull/99).
```bash
# cd out of the mediapipe directory, then:
git clone https://github.com/bazelbuild/tulsi.git
cd tulsi
# Apply the fix for Xcode 10.3 compatibility:
git fetch origin pull/99/head:xcodefix
git checkout xcodefix
# Now we can build Tulsi.
sh build_and_run.sh
```
This will install Tulsi.app inside the Applications directory inside your
home directory.
2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app.
Important: If Tulsi displays an error saying "Bazel could not be found",
press the "Bazel..." button in the Packages tab and select the `bazel`
executable in your homebrew `/bin/` directory.
3. Select the MediaPipe config in the Configs tab, then press the Generate
button below. You will be asked for a location to save the Xcode project.
Once the project is generated, it will be opened in Xcode.
4. You can now select any of the MediaPipe demos in the target menu, and build
and run them as normal.
Note: When you ask Xcode to run an app, by default it will use the Debug
configuration. Some of our demos are computationally heavy; you may want to use
the Release configuration for better performance.
Tip: To switch build configuration in Xcode, click on the target menu, choose
"Edit Scheme...", select the Run action, and switch the Build Configuration from
Debug to Release. Note that this is set independently for each target.
## Building an iOS app from the command line
1. Modify the `bundle_id` field of the app's ios_application rule to use your own identifier, e.g. for [Face Detection GPU App example](./face_detection_mobile_gpu.md), you need to modify the line 26 of the [BUILD file](https://github.com/google/mediapipe/blob/master/mediapipe/examples/ios/facedetectiongpu/BUILD).
2. Build one of the example apps for iOS. We will be using the
[Face Detection GPU App example](./face_detection_mobile_gpu.md)
```bash
cd mediapipe
bazel build --config=ios_arm64 mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp
```
You may see a permission request from `codesign` in order to sign the app.
3. In Xcode, open the `Devices and Simulators` window (command-shift-2).
4. Make sure your device is connected. You will see a list of installed apps.
Press the "+" button under the list, and select the `.ipa` file built by
Bazel.
5. You can now run the app on your device.

View File

@ -41,12 +41,6 @@ To build the app yourself, run:
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu
```
To build for the 3D mode, run:
```bash
bazel build -c opt --config=android_arm64 --define 3D=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu
```
Once the app is built, install it on Android device with:
```bash
@ -57,7 +51,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the HandDetectionGpuApp
target.
@ -67,12 +61,6 @@ To build on the command line:
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp
```
To build for the 3D mode, run:
```bash
bazel build -c opt --config=ios_arm64 --define 3D=true mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp
```
## Graph
The multi-hand tracking [main graph](#main-graph) internal utilizes a

View File

@ -29,7 +29,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the ObjectDetectionCpuApp
target.

View File

@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a
[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/objectdetectiongpu).
See the general [instructions](./mediapipe_ios_setup.md) for building iOS
See the general [instructions](./building_examples.md#ios) for building iOS
examples and generating an Xcode project. This will be the ObjectDetectionGpuApp
target.

View File

@ -0,0 +1,74 @@
# Profiler Configuration Settings
<!--*
# Document freshness: For more information, see go/fresh-source.
freshness: { owner: 'mhays' reviewed: '2020-05-08' }
*-->
[TOC]
The following settings are used when setting up [MediaPipe Tracing](tracer.md)
Many of them are advanced and not recommended for general usage. Consult
[MediaPipe Tracing](tracer.md) for a friendlier introduction.
histogram_interval_size_usec :Specifies the size of the runtimes histogram
intervals (in microseconds) to generate the histogram of the Process() time. The
last interval extends to +inf. If not specified, the interval is 1000000 usec =
1 sec.
num_histogram_intervals :Specifies the number of intervals to generate the
histogram of the `Process()` runtime. If not specified, one interval is used.
enable_profiler
: If true, the profiler starts profiling when graph is initialized.
enable_stream_latency
: If true, the profiler also profiles the stream latency and input-output
latency. No-op if enable_profiler is false.
use_packet_timestamp_for_added_packet
: If true, the profiler uses packet timestamp (as production time and source
production time) for packets added by calling
`CalculatorGraph::AddPacketToInputStream()`. If false, uses the profiler's
clock.
trace_log_capacity
: The maximum number of trace events buffered in memory. The default value
buffers up to 20000 events.
trace_event_types_disabled
: Trace event types that are not logged.
trace_log_path
: The output directory and base-name prefix for trace log files. Log files are
written to: StrCat(trace_log_path, index, "`.binarypb`")
trace_log_count
: The number of trace log files retained. The trace log files are named
"`trace_0.log`" through "`trace_k.log`". The default value specifies 2
output files retained.
trace_log_interval_usec
: The interval in microseconds between trace log output. The default value
specifies trace log output once every 0.5 sec.
trace_log_margin_usec
: The interval in microseconds between TimeNow and the highest times included
in trace log output. This margin allows time for events to be appended to
the TraceBuffer.
trace_log_duration_events
: False specifies an event for each calculator invocation. True specifies a
separate event for each start and finish time.
trace_log_interval_count
: The number of trace log intervals per file. The total log duration is:
`trace_log_interval_usec * trace_log_file_count * trace_log_interval_count`.
The default value specifies 10 intervals per file.
trace_log_disabled
: An option to turn ON/OFF writing trace files to disk. Saving trace files to
disk is enabled by default.
trace_enabled
: If true, tracer timing events are recorded and reported.

View File

@ -36,7 +36,7 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
mediapipe/examples/desktop/template_matching:template_matching_tflite
$ bazel-bin/mediapipe/examples/desktop/template_matching/template_matching_tflite \
--calculator_graph_config_file=mediapipe/graphs/template_matching/index_building.pbtxt \
--input_side_packets="file_directory=<template image directory>,file_suffix='png',output_index_filename=<output index filename>"
--input_side_packets="file_directory=<template image directory>,file_suffix=png,output_index_filename=<output index filename>"
```
The output index file includes the extracted KNIFT features.

View File

@ -0,0 +1,223 @@
# Tracing / Profiling MediaPipe Graphs
The MediaPipe framework includes a built-in tracer and profiler. Tracing can
be activated using a setting in the CalculatorGraphConfig. The tracer records
various timing events related to packet processing, including the start and
end time of each Calculator::Process call. The tracer writes trace log files
in binary protobuf format. The tracer is available on Linux, Android, or iOS.
## Enabling tracing
To enable profiling of a mediapipe graph, the proto buffer representing the graph
must have a profiler_config message at its root. This tag is defined inside
calculator.proto and our public definition can be found in our github repository
with a complete list of settings. Here is a simple setup that turns on a few
extra options:
```
profiler_config {
enable_profiler: true
trace_enabled: true
trace_log_count: 5
}
```
* `enable_profiler` is required to emit any logging at all.
* `trace_enabled` gives us packet level information needed for offline
profiling.
* `trace_log_count` is a convenience that allows us to, by default, to chop up
our log into five separate files which are filled up in a round robin
fashion (after the fifth file is recorded, the first file is used again).
The trace log files are named `trace_0.log` through `trace_k.log`.
See [Profiler Configuration](profiler_config.md) for other settings
available in the profiler config. Note that most of the other settings are
considered advanced, and in general should not be needed.
## Collecting the Logs
MediaPipe will emit data into a pre-specified directory:
* On the desktop, this will be the `/tmp` directory.
* On Android, this will be the `/sdcard` directory.
* On iOS, this can be reached through XCode. Select "Window/Devices and
Simulators" and select the "Devices" tab.
![Windows Select Devices](images/visualizer/ios_window_devices.png)
You can open the Download Container. Logs will be located in `application
container/.xcappdata/AppData/Documents/`
![Windows Download Container](images/visualizer/ios_download_container.png)
Log files are written to `\<trace_log_path index\>.binarypb` where, by default,
`\<trace_log_path\>` is equal to `mediapipe_trace_` (the entire path and file
prefix can be overwritten by setting `trace_log_path` within the
`profiler_config` message). The index will, by default, alternate between 0 and
1, unless you've overridden the trace_log_count as we did, above.
By default, each file records five seconds of events. (Advanced: Specifically,
we record ten intervals of half a second each. This can be overridden by adding
`trace_log_interval_usec` and `trace_log_interval_count` to `profiler_config`).
### Tracing on Linux
1. Follow the instructions stated above in `Enable tracing`
2. Build and run your MediaPipe graph. The running graph writes trace events as
stated above in `Collect the logs`
### Tracing on Android
* Ensure that the Android app has write permissions to external storage.
* Include the line below in your `AndroidManifest.xml` file.
```xml
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
```
* Grant the permission either upon first app launch, or by going into
`Settings -> Apps & notifications -> $YOUR_APP -> Permissions` and
enable `Storage`.
* Add the following protobuf message into the existing calculator-graph-config
protobuf, such as the existing `.pbtxt` file. Follow the instructions stated
above in `Enable tracing`
* Connect your Android device and run `adb devices`.
```bash
adb devices
# should print:
# List of devices attached
# 805KPWQ1876505 device
```
* Use `bazel build` to compile the Android app and use `adb install` to get it
installed on your Android device.
* Open the installed Android app. The running MediaPipe graph appends trace
events to a trace log files at:
```bash
/sdcard/mediapipe_trace_0.binarypb
/sdcard/mediapipe_trace_1.binarypb
```
After every 5 sec, writing shifts to a successive trace log file, such that
the most recent 5 sec of events are preserved. You can check whether the
trace files have been written to the device using adb shell.
```bash
adb shell "ls -la /sdcard/"
```
On android, MediaPipe selects the external storage directory `/sdcard` for
trace logs. This directory can be overridden using the setting
`trace_log_path`, like:
```bash
profiler_config {
trace_enabled: true
trace_log_path: "/sdcard/profiles"
}
```
* Download the trace files from the device.
```bash
# from your terminal
adb pull /sdcard/mediapipe_trace_0.binarypb
# if successful you should see something like
# /sdcard/mediapipe_trace_0.binarypb: 1 file pulled. 0.1 MB/s (6766 bytes in 0.045s)
```
## Analyzing the Logs
Trace logs can be analyzed from within the visualizer.
1. Navigate to
[viz.mediapipe.dev](https://viz.mediapipe.dev)
2. Click on the "Upload" button in the upper right.
![Click on Upload](images/visualizer/viz_click_upload.png)
3. Click on "Upload trace file".
![Click on Upload](images/visualizer/viz_click_upload_trace_file.png)
A sample trace file has been generated for you:
[sample_trace_binary.pb](data/visualizer/sample_trace.binarypb)
4. A file selection popup will appear. Select the `.binarypb` that holds your
trace information.
5. A chart view will appear. All of your calculators will appear along the left
with profiling information listed along the top.
![Click on Upload](images/visualizer/viz_chart_view.png)
Click on a header to alternately sort that column in ascending or descending
order. You can also scroll horizontally and vertically within the control to
see more columns and more calculators.
### Explanation of columns:
name
: The name of the calculator.
fps
: The number of frames that this calculator can generate each second, on
average. `1 / (input_latency_mean + time_mean`) (Units are 1 / second).
frequency
: The rate that this calculator was asked to process packets per second.
(Computed by `# of calls total / (last_call_time - first_call_time))`.
(Units are `1 / second`)
counter
: Number of times process() was called on the calculator. It is the `sum of
dropped + completed`.
dropped
: Number of times the calculator was called but did not produce an output.
completed
: Number of times that this calculator was asked to process inputs after which
it generated outputs.
processing_rate
: `1E+6 / time_mean`. The number of times per second this calculator could run
process, on average. (Units are `1 / second`).
thread_count
: The number of threads that made use of each calculator.
time_mean
: Average time spent within a calculator (in microseconds).
time_stddev
: Standard deviation of time_mean (in microseconds).
time_total
: Total time spent within a calculator (in microseconds).
time_percent
: Percent of total time spent within a calculator.
input_latency_mean
: Average latency between earliest input packet used by a iteration of the
calculator and when the calculator actually begins processing (in
microseconds).
input_latency_stddev
: Standard deviation of input_latency_mean (in microseconds).
input_latency_total
: Total accumulated input_latency (in microseconds).

View File

@ -1,7 +1,8 @@
## Visualizing MediaPipe Graphs
## Visualizing & Tracing MediaPipe Graphs
- [Working within the Editor](#working-within-the-editor)
- [Understanding the Graph](#understanding-the-graph)
- [Tracing the Graph](#tracing-the-graph)
- [Visualizing Subgraphs](#visualizing-subgraphs)
To help users understand the structure of their calculator graphs and to
@ -64,6 +65,19 @@ The visualizer graph shows the connections between calculator nodes.
![Special nodes](./images/special_nodes_code.png)
### Tracing the Graph
The MediaPipe visualizer can display either a calculator graph definition or a
calculator graph execution trace. In a MediaPipe graph, execution tracing can be
activated using a setting in the CalculatorGraphConfig,
`profiler_config.tracing_enabled`. When activated the tracer writes trace log
files on either Linux, Android, or iOS.
For more details on activating execution tracing, see
[Tracing MediaPipe Graphs](./tracer.md)
### Visualizing Subgraphs
The MediaPipe visualizer can display multiple graphs in separate tabs. If a
@ -75,9 +89,9 @@ the subgraph's definition.
For instance, there are two graphs involved in the
[hand detection example](./hand_detection_mobile_gpu.md): the main graph
([source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt))
([source pbtxt file](https://github.com/google/mediapipe/blob/master/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt))
and its associated subgraph
([source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt)).
([source pbtxt file](https://github.com/google/mediapipe/blob/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt)).
To visualize them:
* In the MediaPipe visualizer, click on the upload graph button and select the

View File

@ -120,7 +120,7 @@ the inference for both local videos and the dataset
to local.
```bash
curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz
curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz http://data.yt8m.org/models/baseline/saved_model.tar.gz
tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe
```
@ -156,7 +156,7 @@ the inference for both local videos and the dataset
to local.
```bash
curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz
curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz http://data.yt8m.org/models/baseline/saved_model.tar.gz
tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe
```

View File

@ -1,29 +0,0 @@
MediaPipe Examples
==================
This directory contains MediaPipe Android example applications for different use cases. The applications use CameraX API to access the camera.
## Use Cases
| Use Case | Directory |
|---------------------------------------|:-----------------------------------:|
| Edge Detection on GPU | edgedetectiongpu |
| Face Detection on CPU | facedetectioncpu |
| Face Detection on GPU | facedetectiongpu |
| Object Detection on CPU | objectdetectioncpu |
| Object Detection on GPU | objectdetectiongpu |
| Hair Segmentation on GPU | hairsegmentationgpu |
| Hand Detection on GPU | handdetectiongpu |
| Hand Tracking on GPU | handtrackinggpu |
For instance, to build an example app for face detection on CPU, run:
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu
```
To further install the app on an Android device, run:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu/facedetectioncpu.apk
```

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.facedetectiongpu">
package="com.google.mediapipe.apps.basic">
<uses-sdk
android:minSdkVersion="21"
@ -9,18 +9,16 @@
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<!-- For MediaPipe -->
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
<application
android:allowBackup="true"
android:label="@string/app_name"
android:icon="@mipmap/ic_launcher"
android:label="${appName}"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:name="${mainActivity}"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
@ -28,6 +26,10 @@
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
<meta-data android:name="cameraFacingFront" android:value="${cameraFacingFront}"/>
<meta-data android:name="binaryGraphName" android:value="${binaryGraphName}"/>
<meta-data android:name="inputVideoStreamName" android:value="${inputVideoStreamName}"/>
<meta-data android:name="outputVideoStreamName" android:value="${outputVideoStreamName}"/>
</application>
</manifest>

View File

@ -14,45 +14,14 @@
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/edge_detection:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
# easily incorporated into the app via, for example,
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph"],
outs = ["edgedetectiongpu.binarypb"],
cmd = "cp $< $@",
)
# Basic library common across example apps.
android_library(
name = "mediapipe_lib",
name = "basic_lib",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
visibility = ["//visibility:public"],
deps = [
":mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
@ -65,12 +34,49 @@ android_library(
],
)
android_binary(
name = "edgedetectiongpu",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.edgedetectiongpu"},
multidex = "native",
# Manifest common across example apps.
exports_files(
srcs = ["AndroidManifest.xml"],
)
# Native dependencies to perform edge detection in the Hello World example.
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
":mediapipe_lib",
"//mediapipe/graphs/edge_detection:mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
# Converts the .so cc_binary into a cc_library, to be consumed in an android_binary.
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
# Hello World example app.
android_binary(
name = "helloworld",
assets = [
"//mediapipe/graphs/edge_detection:mobile_gpu.binarypb",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.basic",
"appName": "Hello World",
"mainActivity": ".MainActivity",
"cameraFacingFront": "False",
"binaryGraphName": "mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
},
multidex = "native",
deps = [
":basic_lib",
":mediapipe_jni_lib",
],
)

View File

@ -12,11 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.hairsegmentationgpu;
package com.google.mediapipe.apps.basic;
import android.content.pm.ApplicationInfo;
import android.content.pm.PackageManager;
import android.content.pm.PackageManager.NameNotFoundException;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Log;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
@ -30,15 +34,10 @@ import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.glutil.EglManager;
/** Main activity of MediaPipe example apps. */
/** Main activity of MediaPipe basic app. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private static final String BINARY_GRAPH_NAME = "hairsegmentationgpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
@ -48,9 +47,20 @@ public class MainActivity extends AppCompatActivity {
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
try {
System.loadLibrary("opencv_java3");
} catch (java.lang.UnsatisfiedLinkError e) {
// Some example apps (e.g. template matching) require OpenCV 4.
System.loadLibrary("opencv_java4");
}
}
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
protected FrameProcessor processor;
// Handles camera access via the {@link CameraX} Jetpack support library.
protected CameraXPreviewHelper cameraHelper;
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
@ -58,36 +68,39 @@ public class MainActivity extends AppCompatActivity {
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
// ApplicationInfo for retrieving metadata defined in the manifest.
private ApplicationInfo applicationInfo;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
try {
applicationInfo =
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
} catch (NameNotFoundException e) {
Log.e(TAG, "Cannot find application info: " + e);
}
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
applicationInfo.metaData.getString("binaryGraphName"),
applicationInfo.metaData.getString("inputVideoStreamName"),
applicationInfo.metaData.getString("outputVideoStreamName"));
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
PermissionHelper.checkAndRequestCameraPermissions(this);
@ -117,6 +130,26 @@ public class MainActivity extends AppCompatActivity {
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
protected void onCameraStarted(SurfaceTexture surfaceTexture) {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
}
public void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
onCameraStarted(surfaceTexture);
});
CameraHelper.CameraFacing cameraFacing =
applicationInfo.metaData.getBoolean("cameraFacingFront", false)
? CameraHelper.CameraFacing.FRONT
: CameraHelper.CameraFacing.BACK;
cameraHelper.startCamera(this, cameraFacing, /*surfaceTexture=*/ null);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
@ -155,16 +188,4 @@ public class MainActivity extends AppCompatActivity {
}
});
}
private void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
}

View File

@ -0,0 +1,34 @@
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportHeight="108"
android:viewportWidth="108">
<path
android:fillType="evenOdd"
android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
android:strokeColor="#00000000"
android:strokeWidth="1">
<aapt:attr name="android:fillColor">
<gradient
android:endX="78.5885"
android:endY="90.9159"
android:startX="48.7653"
android:startY="61.0927"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
android:strokeColor="#00000000"
android:strokeWidth="1" />
</vector>

View File

@ -0,0 +1,74 @@
<?xml version="1.0" encoding="utf-8"?>
<vector
android:height="108dp"
android:width="108dp"
android:viewportHeight="108"
android:viewportWidth="108"
xmlns:android="http://schemas.android.com/apk/res/android">
<path android:fillColor="#26A69A"
android:pathData="M0,0h108v108h-108z"/>
<path android:fillColor="#00000000" android:pathData="M9,0L9,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,0L19,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M29,0L29,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M39,0L39,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M49,0L49,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M59,0L59,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M69,0L69,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M79,0L79,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M89,0L89,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M99,0L99,108"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,9L108,9"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,19L108,19"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,29L108,29"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,39L108,39"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,49L108,49"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,59L108,59"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,69L108,69"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,79L108,79"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,89L108,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M0,99L108,99"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,29L89,29"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,39L89,39"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,49L89,49"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,59L89,59"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,69L89,69"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M19,79L89,79"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M29,19L29,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M39,19L39,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M49,19L49,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M59,19L59,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M69,19L69,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
<path android:fillColor="#00000000" android:pathData="M79,19L79,89"
android:strokeColor="#33FFFFFF" android:strokeWidth="0.8"/>
</vector>

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background"/>
<foreground android:drawable="@mipmap/ic_launcher_foreground"/>
</adaptive-icon>

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 959 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -1,4 +1,3 @@
<resources>
<string name="app_name" translatable="false">Face Mesh GPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -1,29 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.edgedetectiongpu">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<application
android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -1,169 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.edgedetectiongpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.glutil.EglManager;
/** Bare-bones main activity. */
public class MainActivity extends AppCompatActivity {
private static final String BINARY_GRAPH_NAME = "edgedetectiongpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
private static final boolean FLIP_FRAMES_VERTICALLY = true;
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
}
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
private SurfaceView previewDisplayView;
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@Override
protected void onResume() {
super.onResume();
converter = new ExternalTextureConverter(eglManager.getContext());
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
converter.setConsumer(processor);
if (PermissionHelper.cameraPermissionsGranted(this)) {
startCamera();
}
}
@Override
protected void onPause() {
super.onPause();
converter.close();
}
@Override
public void onRequestPermissionsResult(
int requestCode, String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
public void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
viewGroup.addView(previewDisplayView);
previewDisplayView
.getHolder()
.addCallback(
new SurfaceHolder.Callback() {
@Override
public void surfaceCreated(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
// (Re-)Compute the ideal size of the camera-preview display (the area that the
// camera-preview frames get rendered onto, potentially with scaling and rotation)
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(null);
}
});
}
}

View File

@ -1,4 +0,0 @@
<resources>
<string name="app_name" translatable="false">Edge Detection GPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -1,33 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.apps.facedetectioncpu">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<uses-feature android:name="android.hardware.camera.autofocus" />
<!-- For MediaPipe -->
<uses-feature android:glEsVersion="0x00020000" android:required="true" />
<application
android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity
android:name=".MainActivity"
android:exported="true"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -32,51 +32,28 @@ cc_library(
alwayslink = 1,
)
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
# easily incorporated into the app via, for example,
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/face_detection:mobile_cpu_binary_graph"],
outs = ["facedetectioncpu.binarypb"],
cmd = "cp $< $@",
)
android_library(
name = "mediapipe_lib",
android_binary(
name = "facedetectioncpu",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
"//mediapipe/graphs/face_detection:mobile_cpu.binarypb",
"//mediapipe/models:face_detection_front.tflite",
"//mediapipe/models:face_detection_front_labelmap.txt",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
deps = [
":mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_lifecycle_lifecycle_common",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_guava_guava",
],
)
android_binary(
name = "facedetectioncpu",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.facedetectioncpu"},
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.facedetectioncpu",
"appName": "Face Detection (CPU)",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "mobile_cpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
},
multidex = "native",
deps = [
":mediapipe_lib",
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
],
)

View File

@ -1,170 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.facedetectioncpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.glutil.EglManager;
/** Main activity of MediaPipe example apps. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private static final String BINARY_GRAPH_NAME = "facedetectioncpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
private static final boolean FLIP_FRAMES_VERTICALLY = true;
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
}
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
private SurfaceView previewDisplayView;
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@Override
protected void onResume() {
super.onResume();
converter = new ExternalTextureConverter(eglManager.getContext());
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
converter.setConsumer(processor);
if (PermissionHelper.cameraPermissionsGranted(this)) {
startCamera();
}
}
@Override
protected void onPause() {
super.onPause();
converter.close();
}
@Override
public void onRequestPermissionsResult(
int requestCode, String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
viewGroup.addView(previewDisplayView);
previewDisplayView
.getHolder()
.addCallback(
new SurfaceHolder.Callback() {
@Override
public void surfaceCreated(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
// (Re-)Compute the ideal size of the camera-preview display (the area that the
// camera-preview frames get rendered onto, potentially with scaling and rotation)
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(null);
}
});
}
private void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
}

View File

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent">
<FrameLayout
android:id="@+id/preview_display_layout"
android:layout_width="fill_parent"
android:layout_height="fill_parent"
android:layout_weight="1">
<TextView
android:id="@+id/no_camera_access_view"
android:layout_height="fill_parent"
android:layout_width="fill_parent"
android:gravity="center"
android:text="@string/no_camera_access" />
</FrameLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#008577</color>
<color name="colorPrimaryDark">#00574B</color>
<color name="colorAccent">#D81B60</color>
</resources>

View File

@ -1,4 +0,0 @@
<resources>
<string name="app_name" translatable="false">Face Detection CPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -1,11 +0,0 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
</resources>

View File

@ -32,51 +32,28 @@ cc_library(
alwayslink = 1,
)
# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
# easily incorporated into the app via, for example,
# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
genrule(
name = "binary_graph",
srcs = ["//mediapipe/graphs/face_detection:mobile_gpu_binary_graph"],
outs = ["facedetectiongpu.binarypb"],
cmd = "cp $< $@",
)
android_library(
name = "mediapipe_lib",
android_binary(
name = "facedetectiongpu",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
"//mediapipe/graphs/face_detection:mobile_gpu.binarypb",
"//mediapipe/models:face_detection_front.tflite",
"//mediapipe/models:face_detection_front_labelmap.txt",
],
assets_dir = "",
manifest = "AndroidManifest.xml",
resource_files = glob(["res/**"]),
deps = [
":mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:androidx_legacy_support_v4",
"//third_party:androidx_recyclerview",
"//third_party:opencv",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_lifecycle_lifecycle_common",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_guava_guava",
],
)
android_binary(
name = "facedetectiongpu",
manifest = "AndroidManifest.xml",
manifest_values = {"applicationId": "com.google.mediapipe.apps.facedetectiongpu"},
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.facedetectiongpu",
"appName": "Face Detection",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
},
multidex = "native",
deps = [
":mediapipe_lib",
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
],
)

View File

@ -1,170 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.facedetectiongpu;
import android.graphics.SurfaceTexture;
import android.os.Bundle;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Size;
import android.view.SurfaceHolder;
import android.view.SurfaceView;
import android.view.View;
import android.view.ViewGroup;
import com.google.mediapipe.components.CameraHelper;
import com.google.mediapipe.components.CameraXPreviewHelper;
import com.google.mediapipe.components.ExternalTextureConverter;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.components.PermissionHelper;
import com.google.mediapipe.framework.AndroidAssetUtil;
import com.google.mediapipe.glutil.EglManager;
/** Main activity of MediaPipe example apps. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private static final String BINARY_GRAPH_NAME = "facedetectiongpu.binarypb";
private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT;
// Flips the camera-preview frames vertically before sending them into FrameProcessor to be
// processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
// This is needed because OpenGL represents images assuming the image origin is at the bottom-left
// corner, whereas MediaPipe in general assumes the image origin is at top-left.
private static final boolean FLIP_FRAMES_VERTICALLY = true;
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
}
// {@link SurfaceTexture} where the camera-preview frames can be accessed.
private SurfaceTexture previewFrameTexture;
// {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
private SurfaceView previewDisplayView;
// Creates and manages an {@link EGLContext}.
private EglManager eglManager;
// Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
// frames onto a {@link Surface}.
private FrameProcessor processor;
// Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
// consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
private ExternalTextureConverter converter;
// Handles camera access via the {@link CameraX} Jetpack support library.
private CameraXPreviewHelper cameraHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
previewDisplayView = new SurfaceView(this);
setupPreviewDisplayView();
// Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
// binary graphs.
AndroidAssetUtil.initializeNativeAssetManager(this);
eglManager = new EglManager(null);
processor =
new FrameProcessor(
this,
eglManager.getNativeContext(),
BINARY_GRAPH_NAME,
INPUT_VIDEO_STREAM_NAME,
OUTPUT_VIDEO_STREAM_NAME);
processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
PermissionHelper.checkAndRequestCameraPermissions(this);
}
@Override
protected void onResume() {
super.onResume();
converter = new ExternalTextureConverter(eglManager.getContext());
converter.setFlipY(FLIP_FRAMES_VERTICALLY);
converter.setConsumer(processor);
if (PermissionHelper.cameraPermissionsGranted(this)) {
startCamera();
}
}
@Override
protected void onPause() {
super.onPause();
converter.close();
}
@Override
public void onRequestPermissionsResult(
int requestCode, String[] permissions, int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
private void setupPreviewDisplayView() {
previewDisplayView.setVisibility(View.GONE);
ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
viewGroup.addView(previewDisplayView);
previewDisplayView
.getHolder()
.addCallback(
new SurfaceHolder.Callback() {
@Override
public void surfaceCreated(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
// (Re-)Compute the ideal size of the camera-preview display (the area that the
// camera-preview frames get rendered onto, potentially with scaling and rotation)
// based on the size of the SurfaceView that contains the display.
Size viewSize = new Size(width, height);
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via
// previewFrameTexture), and configure the output width and height as the computed
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
processor.getVideoSurfaceOutput().setSurface(null);
}
});
}
private void startCamera() {
cameraHelper = new CameraXPreviewHelper();
cameraHelper.setOnCameraStartedListener(
surfaceTexture -> {
previewFrameTexture = surfaceTexture;
// Make the display view visible to start showing the preview. This triggers the
// SurfaceHolder.Callback added to (the holder of) previewDisplayView.
previewDisplayView.setVisibility(View.VISIBLE);
});
cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
}
}

View File

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent">
<FrameLayout
android:id="@+id/preview_display_layout"
android:layout_width="fill_parent"
android:layout_height="fill_parent"
android:layout_weight="1">
<TextView
android:id="@+id/no_camera_access_view"
android:layout_height="fill_parent"
android:layout_width="fill_parent"
android:gravity="center"
android:text="@string/no_camera_access" />
</FrameLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#008577</color>
<color name="colorPrimaryDark">#00574B</color>
<color name="colorAccent">#D81B60</color>
</resources>

View File

@ -1,4 +0,0 @@
<resources>
<string name="app_name" translatable="false">Face Detection GPU</string>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
</resources>

View File

@ -1,11 +0,0 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
</resources>

Some files were not shown because too many files have changed in this diff Show More