diff --git a/.bazelrc b/.bazelrc index da90feaeb..5a586f3ca 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,20 +1,30 @@ # The bazelrc file for MediaPipe OSS. +# Tensorflow needs remote repo +common --experimental_repo_remote_exec + # Basic build settings build --jobs 128 build --define='absl=1' -build --cxxopt='-std=c++14' -build --copt='-Wno-sign-compare' -build --copt='-Wno-unused-function' -build --copt='-Wno-uninitialized' -build --copt='-Wno-unused-result' -build --copt='-Wno-comment' -build --copt='-Wno-return-type' -build --copt='-Wno-unused-local-typedefs' -build --copt='-Wno-ignored-attributes' +build --enable_platform_specific_config -# Tensorflow needs remote repo -build --experimental_repo_remote_exec +# Linux +build:linux --cxxopt=-std=c++14 +build:linux --host_cxxopt=-std=c++14 +build:linux --copt=-w + +# windows +build:windows --cxxopt=/std:c++14 +build:windows --host_cxxopt=/std:c++14 +build:windows --copt=/w +# For using M_* math constants on Windows with MSVC. +build:windows --copt=/D_USE_MATH_DEFINES +build:windows --host_copt=/D_USE_MATH_DEFINES + +# macOS +build:macos --cxxopt=-std=c++14 +build:macos --host_cxxopt=-std=c++14 +build:macos --copt=-w # Sets the default Apple platform to macOS. build --apple_platform_type=macos diff --git a/Dockerfile b/Dockerfile index 9bb2ad1e8..6267a5f00 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:latest +FROM ubuntu:18.04 MAINTAINER @@ -25,11 +25,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ ca-certificates \ curl \ + ffmpeg \ git \ wget \ unzip \ - python \ - python-pip \ + python3-dev \ + python3-opencv \ python3-pip \ libopencv-core-dev \ libopencv-highgui-dev \ @@ -43,9 +44,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade setuptools -RUN pip install future -RUN pip3 install six +RUN pip3 install --upgrade setuptools +RUN pip3 install wheel +RUN pip3 install future +RUN pip3 install six==1.14.0 +RUN pip3 install tensorflow==1.14.0 +RUN pip3 install tf_slim + +RUN ln -s /usr/bin/python3 /usr/bin/python # Install bazel ARG BAZEL_VERSION=2.0.0 diff --git a/README.md b/README.md index 5cc0c2e97..737d7a0cc 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,9 @@ Search MediaPipe Github repository using [Google Open Source code search](https: * [Google Industry Workshop at ICIP 2019](http://2019.ieeeicip.org/?action=page4&id=14#Google) [Presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5) on Sept 24 in Taipei, Taiwan * [Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA -## Community forum +## Community +* [Awesome MediaPipe: curation of code related to MediaPipe](https://mediapipe.org) +* [Slack community for MediaPipe users](https://mediapipe.slack.com) * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe ## Alpha Disclaimer diff --git a/WORKSPACE b/WORKSPACE index 1901344ba..8b148fd4a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -54,17 +54,15 @@ http_archive( # gflags needed by glog http_archive( name = "com_github_gflags_gflags", - sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe", - strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a", - urls = [ - "https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", - "https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", - ], + strip_prefix = "gflags-2.2.2", + sha256 = "19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5", + url = "https://github.com/gflags/gflags/archive/v2.2.2.zip", ) -# glog +# glog v0.3.5 +# TODO: Migrate MediaPipe to use com_github_glog_glog on all platforms. http_archive( - name = "com_github_glog_glog", + name = "com_github_glog_glog_v_0_3_5", url = "https://github.com/google/glog/archive/v0.3.5.zip", sha256 = "267103f8a1e9578978aa1dc256001e6529ef593e5aea38193d31c2872ee025e8", strip_prefix = "glog-0.3.5", @@ -77,6 +75,16 @@ http_archive( ], ) +# 2020-02-16 +http_archive( + name = "com_github_glog_glog", + strip_prefix = "glog-3ba8976592274bc1f907c402ce22558011d6fc5e", + sha256 = "feca3c7e29a693cab7887409756d89d342d4a992d54d7c5599bebeae8f7b50be", + urls = [ + "https://github.com/google/glog/archive/3ba8976592274bc1f907c402ce22558011d6fc5e.zip", + ], +) + # easyexif http_archive( name = "easyexif", @@ -101,51 +109,30 @@ http_archive( urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz"], ) +http_archive( + name = "com_google_protobuf", + sha256 = "a79d19dcdf9139fa4b81206e318e33d245c4c9da1ffed21c87288ed4380426f9", + strip_prefix = "protobuf-3.11.4", + urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz"], + patches = [ + "@//third_party:com_google_protobuf_fixes.diff" + ], + patch_args = [ + "-p1", + ], +) + http_archive( name = "com_google_audio_tools", strip_prefix = "multichannel-audio-tools-master", urls = ["https://github.com/google/multichannel-audio-tools/archive/master.zip"], ) -# Needed by TensorFlow -http_archive( - name = "io_bazel_rules_closure", - sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", - strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", - urls = [ - "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 - ], -) - -# 2020-04-01 -_TENSORFLOW_GIT_COMMIT = "805e47cea96c7e8c6fccf494d40a2392dc99fdd8" -_TENSORFLOW_SHA256= "9ee3ae604c2e1345ac60345becee6d659364721513f9cb8652eb2e7138320ca5" -http_archive( - name = "org_tensorflow", - urls = [ - "https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, - "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, - ], - patches = [ - "@//third_party:org_tensorflow_compatibility_fixes.diff", - "@//third_party:org_tensorflow_protobuf_updates.diff", - ], - patch_args = [ - "-p1", - ], - strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, - sha256 = _TENSORFLOW_SHA256, -) - -load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") -tf_workspace(tf_repo_name = "org_tensorflow") - http_archive( name = "ceres_solver", url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip", patches = [ - "@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff" + "@//third_party:ceres_solver_compatibility_fixes.diff" ], patch_args = [ "-p1", @@ -178,6 +165,12 @@ new_local_repository( path = "/usr", ) +new_local_repository( + name = "windows_opencv", + build_file = "@//third_party:opencv_windows.BUILD", + path = "C:\\opencv\\build", +) + http_archive( name = "android_opencv", build_file = "@//third_party:opencv_android.BUILD", @@ -236,6 +229,15 @@ load( swift_rules_dependencies() +http_archive( + name = "build_bazel_apple_support", + sha256 = "122ebf7fe7d1c8e938af6aeaee0efe788a3a2449ece5a8d6a428cb18d6f88033", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz", + "https://github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz", + ], +) + load( "@build_bazel_apple_support//lib:repositories.bzl", "apple_support_dependencies", @@ -299,3 +301,37 @@ maven_install( fetch_sources = True, version_conflict_policy = "pinned", ) + +# Needed by TensorFlow +http_archive( + name = "io_bazel_rules_closure", + sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", + strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", + urls = [ + "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 + ], +) + +#Tensorflow repo should always go after the other external dependencies. +# 2020-05-11 +_TENSORFLOW_GIT_COMMIT = "7c09d15f9fcc14343343c247ebf5b8e0afe3e4aa" +_TENSORFLOW_SHA256= "673d00cbd2676ae43df1993e0d28c10b5ffbe96d9e2ab29f88a77b43c0211299" +http_archive( + name = "org_tensorflow", + urls = [ + "https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, + "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, + ], + patches = [ + "@//third_party:org_tensorflow_compatibility_fixes.diff", + ], + patch_args = [ + "-p1", + ], + strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, + sha256 = _TENSORFLOW_SHA256, +) + +load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") +tf_workspace(tf_repo_name = "org_tensorflow") diff --git a/mediapipe/BUILD b/mediapipe/BUILD index 82bb3c83e..1171ea6f0 100644 --- a/mediapipe/BUILD +++ b/mediapipe/BUILD @@ -134,6 +134,11 @@ config_setting( ] ] +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, +) + exports_files( ["provisioning_profile.mobileprovision"], visibility = ["//visibility:public"], diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 367194f5a..6196bed5b 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -500,6 +500,7 @@ cc_library( "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:logging", "//mediapipe/framework/port:status", + "//mediapipe/framework/tool:options_util", ], alwayslink = 1, ) diff --git a/mediapipe/calculators/core/packet_thinner_calculator.cc b/mediapipe/calculators/core/packet_thinner_calculator.cc index 134828444..417fafa31 100644 --- a/mediapipe/calculators/core/packet_thinner_calculator.cc +++ b/mediapipe/calculators/core/packet_thinner_calculator.cc @@ -24,11 +24,13 @@ #include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/tool/options_util.h" namespace mediapipe { namespace { const double kTimebaseUs = 1000000; // Microseconds. +const char* const kOptionsTag = "OPTIONS"; const char* const kPeriodTag = "PERIOD"; } // namespace @@ -63,9 +65,15 @@ const char* const kPeriodTag = "PERIOD"; // Thinning period can be provided in the calculator options or via a // side packet with the tag "PERIOD". // +// Calculator options provided optionally with the "OPTIONS" input +// sidepacket tag will be merged with this calculator's node options, i.e., +// singular fields of the side packet will overwrite the options defined in the +// node, and repeated fields will concatenate. +// // Example config: // node { // calculator: "PacketThinnerCalculator" +// input_side_packet: "OPTIONS:calculator_options" // input_stream: "signal" // output_stream: "output" // options { @@ -83,6 +91,9 @@ class PacketThinnerCalculator : public CalculatorBase { ~PacketThinnerCalculator() override {} static ::mediapipe::Status GetContract(CalculatorContract* cc) { + if (cc->InputSidePackets().HasTag(kOptionsTag)) { + cc->InputSidePackets().Tag(kOptionsTag).Set(); + } cc->Inputs().Index(0).SetAny(); cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); if (cc->InputSidePackets().HasTag(kPeriodTag)) { @@ -143,7 +154,9 @@ TimestampDiff abs(TimestampDiff t) { return t < 0 ? -t : t; } } // namespace ::mediapipe::Status PacketThinnerCalculator::Open(CalculatorContext* cc) { - auto& options = cc->Options(); + PacketThinnerCalculatorOptions options = mediapipe::tool::RetrieveOptions( + cc->Options(), cc->InputSidePackets(), + kOptionsTag); thinner_type_ = options.thinner_type(); // This check enables us to assume only two thinner types exist in Process() diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index feefd6a56..9d14ec956 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -93,8 +93,7 @@ class PreviousLoopbackCalculator : public CalculatorBase { // MAIN packet, hence not caring about corresponding loop packet. loop_timestamp = Timestamp::Unset(); } - main_packet_specs_.push_back({.timestamp = main_packet.Timestamp(), - .loop_timestamp = loop_timestamp}); + main_packet_specs_.push_back({main_packet.Timestamp(), loop_timestamp}); prev_main_ts_ = main_packet.Timestamp(); } diff --git a/mediapipe/calculators/image/color_convert_calculator.cc b/mediapipe/calculators/image/color_convert_calculator.cc index aa1b51c51..f31586d9d 100644 --- a/mediapipe/calculators/image/color_convert_calculator.cc +++ b/mediapipe/calculators/image/color_convert_calculator.cc @@ -38,9 +38,11 @@ void SetColorChannel(int channel, uint8 value, cv::Mat* mat) { constexpr char kRgbaInTag[] = "RGBA_IN"; constexpr char kRgbInTag[] = "RGB_IN"; +constexpr char kBgraInTag[] = "BGRA_IN"; constexpr char kGrayInTag[] = "GRAY_IN"; constexpr char kRgbaOutTag[] = "RGBA_OUT"; constexpr char kRgbOutTag[] = "RGB_OUT"; +constexpr char kBgraOutTag[] = "BGRA_OUT"; constexpr char kGrayOutTag[] = "GRAY_OUT"; } // namespace @@ -53,6 +55,8 @@ constexpr char kGrayOutTag[] = "GRAY_OUT"; // GRAY -> RGB // RGB -> GRAY // RGB -> RGBA +// RGBA -> BGRA +// BGRA -> RGBA // // This calculator only supports a single input stream and output stream at a // time. If more than one input stream or output stream is present, the @@ -63,11 +67,13 @@ constexpr char kGrayOutTag[] = "GRAY_OUT"; // Input streams: // RGBA_IN: The input video stream (ImageFrame, SRGBA). // RGB_IN: The input video stream (ImageFrame, SRGB). +// BGRA_IN: The input video stream (ImageFrame, SBGRA). // GRAY_IN: The input video stream (ImageFrame, GRAY8). // // Output streams: // RGBA_OUT: The output video stream (ImageFrame, SRGBA). // RGB_OUT: The output video stream (ImageFrame, SRGB). +// BGRA_OUT: The output video stream (ImageFrame, SBGRA). // GRAY_OUT: The output video stream (ImageFrame, GRAY8). class ColorConvertCalculator : public CalculatorBase { public: @@ -113,6 +119,10 @@ REGISTER_CALCULATOR(ColorConvertCalculator); cc->Inputs().Tag(kRgbInTag).Set(); } + if (cc->Inputs().HasTag(kBgraInTag)) { + cc->Inputs().Tag(kBgraInTag).Set(); + } + if (cc->Outputs().HasTag(kRgbOutTag)) { cc->Outputs().Tag(kRgbOutTag).Set(); } @@ -125,6 +135,10 @@ REGISTER_CALCULATOR(ColorConvertCalculator); cc->Outputs().Tag(kRgbaOutTag).Set(); } + if (cc->Outputs().HasTag(kBgraOutTag)) { + cc->Outputs().Tag(kBgraOutTag).Set(); + } + return ::mediapipe::OkStatus(); } @@ -171,6 +185,16 @@ REGISTER_CALCULATOR(ColorConvertCalculator); return ConvertAndOutput(kRgbInTag, kRgbaOutTag, ImageFormat::SRGBA, cv::COLOR_RGB2RGBA, cc); } + // BGRA -> RGBA + if (cc->Inputs().HasTag(kBgraInTag) && cc->Outputs().HasTag(kRgbaOutTag)) { + return ConvertAndOutput(kBgraInTag, kRgbaOutTag, ImageFormat::SRGBA, + cv::COLOR_BGRA2RGBA, cc); + } + // RGBA -> BGRA + if (cc->Inputs().HasTag(kRgbaInTag) && cc->Outputs().HasTag(kBgraOutTag)) { + return ConvertAndOutput(kRgbaInTag, kBgraOutTag, ImageFormat::SBGRA, + cv::COLOR_RGBA2BGRA, cc); + } return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) << "Unsupported image format conversion."; diff --git a/mediapipe/calculators/image/image_cropping_calculator.cc b/mediapipe/calculators/image/image_cropping_calculator.cc index 9500941cb..7754c1198 100644 --- a/mediapipe/calculators/image/image_cropping_calculator.cc +++ b/mediapipe/calculators/image/image_cropping_calculator.cc @@ -514,13 +514,7 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc, } } - return { - .width = crop_width, - .height = crop_height, - .center_x = x_center, - .center_y = y_center, - .rotation = rotation, - }; + return {crop_width, crop_height, x_center, y_center, rotation}; } ::mediapipe::Status ImageCroppingCalculator::GetBorderModeForOpenCV( diff --git a/mediapipe/calculators/image/image_transformation_calculator.cc b/mediapipe/calculators/image/image_transformation_calculator.cc index 859609139..d771707d7 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.cc +++ b/mediapipe/calculators/image/image_transformation_calculator.cc @@ -392,19 +392,26 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); } cv::Mat scaled_mat; + int output_width = output_width_; + int output_height = output_height_; if (scale_mode_ == mediapipe::ScaleMode_Mode_STRETCH) { - cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_)); + int scale_flag = + input_mat.cols > output_width_ && input_mat.rows > output_height_ + ? cv::INTER_AREA + : cv::INTER_LINEAR; + cv::resize(input_mat, scaled_mat, cv::Size(output_width_, output_height_), + 0, 0, scale_flag); } else { const float scale = std::min(static_cast(output_width_) / input_width, static_cast(output_height_) / input_height); const int target_width = std::round(input_width * scale); const int target_height = std::round(input_height * scale); - + int scale_flag = scale < 1.0f ? cv::INTER_AREA : cv::INTER_LINEAR; if (scale_mode_ == mediapipe::ScaleMode_Mode_FIT) { cv::Mat intermediate_mat; cv::resize(input_mat, intermediate_mat, - cv::Size(target_width, target_height)); + cv::Size(target_width, target_height), 0, 0, scale_flag); const int top = (output_height_ - target_height) / 2; const int bottom = output_height_ - target_height - top; const int left = (output_width_ - target_width) / 2; @@ -413,16 +420,13 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); options_.constant_padding() ? cv::BORDER_CONSTANT : cv::BORDER_REPLICATE); } else { - cv::resize(input_mat, scaled_mat, cv::Size(target_width, target_height)); - output_width_ = target_width; - output_height_ = target_height; + cv::resize(input_mat, scaled_mat, cv::Size(target_width, target_height), + 0, 0, scale_flag); + output_width = target_width; + output_height = target_height; } } - int output_width; - int output_height; - ComputeOutputDimensions(input_width, input_height, &output_width, - &output_height); if (cc->Outputs().HasTag("LETTERBOX_PADDING")) { auto padding = absl::make_unique>(); ComputeOutputLetterboxPadding(input_width, input_height, output_width, diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index f774fe717..ea3fcc715 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -321,7 +321,7 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_lib_lite", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], }), alwayslink = 1, @@ -343,7 +343,7 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_lib_lite", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], }), alwayslink = 1, @@ -449,10 +449,10 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], "//mediapipe:ios": [ - "@org_tensorflow//tensorflow/core:ios_tensorflow_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", ], }), alwayslink = 1, @@ -470,10 +470,10 @@ cc_library( "@org_tensorflow//tensorflow/core:core", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], "//mediapipe:ios": [ - "@org_tensorflow//tensorflow/core:ios_tensorflow_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", ], }), ) @@ -496,11 +496,11 @@ cc_library( "@org_tensorflow//tensorflow/core:core", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", "//mediapipe/android/file/base", ], "//mediapipe:ios": [ - "@org_tensorflow//tensorflow/core:ios_tensorflow_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", "//mediapipe/android/file/base", ], }), @@ -525,11 +525,11 @@ cc_library( "@org_tensorflow//tensorflow/core:core", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_tensorflow_lib_lite_nortti_lite_protos", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", "//mediapipe/android/file/base", ], "//mediapipe:ios": [ - "@org_tensorflow//tensorflow/core:ios_tensorflow_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", "//mediapipe/android/file/base", ], }), @@ -637,7 +637,7 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_lib_lite", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], }), alwayslink = 1, @@ -673,7 +673,7 @@ cc_library( "@org_tensorflow//tensorflow/core:framework", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_lib_lite", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", ], }), alwayslink = 1, @@ -1109,11 +1109,11 @@ cc_test( "@org_tensorflow//tensorflow/core:direct_session", ], "//mediapipe:android": [ - "@org_tensorflow//tensorflow/core:android_tensorflow_lib_with_ops_lite_proto_no_rtti_lib", - "@org_tensorflow//tensorflow/core:android_tensorflow_test_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_test_lib", ], "//mediapipe:ios": [ - "@org_tensorflow//tensorflow/core:ios_tensorflow_test_lib", + "@org_tensorflow//tensorflow/core:portable_tensorflow_test_lib", ], }), ) diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index 1932bbbf1..169ef23f5 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -198,6 +198,7 @@ cc_test( cc_library( name = "util", hdrs = ["util.h"], + visibility = ["//visibility:public"], alwayslink = 1, ) @@ -525,16 +526,16 @@ cc_test( ":tflite_converter_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", - "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:parse_text_proto", - "//mediapipe/framework/port:status", "//mediapipe/framework/tool:validate_type", "@com_google_absl//absl/memory", "@org_tensorflow//tensorflow/lite:framework", - "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", ], ) diff --git a/mediapipe/calculators/tflite/ssd_anchors_calculator.cc b/mediapipe/calculators/tflite/ssd_anchors_calculator.cc index c63b5ce94..90d35573e 100644 --- a/mediapipe/calculators/tflite/ssd_anchors_calculator.cc +++ b/mediapipe/calculators/tflite/ssd_anchors_calculator.cc @@ -26,8 +26,12 @@ namespace { float CalculateScale(float min_scale, float max_scale, int stride_index, int num_strides) { - return min_scale + - (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f); + if (num_strides == 1) { + return (min_scale + max_scale) * 0.5f; + } else { + return min_scale + + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f); + } } } // namespace @@ -114,7 +118,7 @@ REGISTER_CALCULATOR(SsdAnchorsCalculator); } int layer_id = 0; - while (layer_id < options.strides_size()) { + while (layer_id < options.num_layers()) { std::vector anchor_height; std::vector anchor_width; std::vector aspect_ratios; diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index 3d5cdff89..76bac09e4 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -67,10 +67,12 @@ constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kTensorsTag[] = "TENSORS"; constexpr char kTensorsGpuTag[] = "TENSORS_GPU"; +constexpr char kMatrixTag[] = "MATRIX"; } // namespace namespace mediapipe { +namespace { #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlProgram; @@ -89,6 +91,8 @@ struct GPUData { }; #endif +} // namespace + // Calculator for normalizing and converting an ImageFrame or Matrix // into a TfLiteTensor (float 32) or a GpuBuffer to a tflite::gpu::GlBuffer // or MTLBuffer. @@ -164,6 +168,9 @@ class TfLiteConverterCalculator : public CalculatorBase { bool initialized_ = false; bool use_gpu_ = false; bool zero_center_ = true; // normalize range to [-1,1] | otherwise [0,1] + bool use_custom_normalization_ = false; + float custom_div_ = -1.0f; + float custom_sub_ = -1.0f; bool flip_vertically_ = false; bool row_major_matrix_ = false; bool use_quantized_tensors_ = false; @@ -175,7 +182,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); CalculatorContract* cc) { // Confirm only one of the input streams is present. RET_CHECK(cc->Inputs().HasTag(kImageFrameTag) ^ - cc->Inputs().HasTag(kGpuBufferTag) ^ cc->Inputs().HasTag("MATRIX")); + cc->Inputs().HasTag(kGpuBufferTag) ^ + cc->Inputs().HasTag(kMatrixTag)); // Confirm only one of the output streams is present. RET_CHECK(cc->Outputs().HasTag(kTensorsTag) ^ @@ -186,8 +194,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Inputs().HasTag(kImageFrameTag)) { cc->Inputs().Tag(kImageFrameTag).Set(); } - if (cc->Inputs().HasTag("MATRIX")) { - cc->Inputs().Tag("MATRIX").Set(); + if (cc->Inputs().HasTag(kMatrixTag)) { + cc->Inputs().Tag(kMatrixTag).Set(); } #if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag(kGpuBufferTag)) { @@ -257,6 +265,9 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); ::mediapipe::Status TfLiteConverterCalculator::Process(CalculatorContext* cc) { if (use_gpu_) { + if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } if (!initialized_) { MP_RETURN_IF_ERROR(InitGpu(cc)); initialized_ = true; @@ -283,6 +294,9 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); ::mediapipe::Status TfLiteConverterCalculator::ProcessCPU( CalculatorContext* cc) { if (cc->Inputs().HasTag(kImageFrameTag)) { + if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } // CPU ImageFrame to TfLiteTensor conversion. const auto& image_frame = @@ -361,10 +375,12 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); cc->Outputs() .Tag(kTensorsTag) .Add(output_tensors.release(), cc->InputTimestamp()); - } else if (cc->Inputs().HasTag("MATRIX")) { + } else if (cc->Inputs().HasTag(kMatrixTag)) { + if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } // CPU Matrix to TfLiteTensor conversion. - - const auto& matrix = cc->Inputs().Tag("MATRIX").Get(); + const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get(); const int height = matrix.rows(); const int width = matrix.cols(); const int channels = 1; @@ -614,6 +630,11 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); // Get data normalization mode. zero_center_ = options.zero_center(); + // Custom div and sub values. + use_custom_normalization_ = options.use_custom_normalization(); + custom_div_ = options.custom_div(); + custom_sub_ = options.custom_sub(); + // Get y-flip mode. flip_vertically_ = options.flip_vertically(); @@ -649,7 +670,13 @@ template const int channels_ignored = channels - channels_preserved; float div, sub; - if (zero_center) { + + if (use_custom_normalization_) { + RET_CHECK_GT(custom_div_, 0.0f); + RET_CHECK_GE(custom_sub_, 0.0f); + div = custom_div_; + sub = custom_sub_; + } else if (zero_center) { // [-1,1] div = 127.5f; sub = 1.0f; diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.proto b/mediapipe/calculators/tflite/tflite_converter_calculator.proto index 2c0d8f4e1..4d468c851 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.proto @@ -28,6 +28,16 @@ message TfLiteConverterCalculatorOptions { // Ignored if using quantization. optional bool zero_center = 1 [default = true]; + // Custom settings to override the internal scaling factors `div` and `sub`. + // Both values must be set to non-negative values. Will only take effect on + // CPU AND when |use_custom_normalization| is set to true. When these custom + // values take effect, the |zero_center| setting above will be overriden, and + // the normalized_value will be calculated as: + // normalized_value = input / custom_div - custom_sub. + optional bool use_custom_normalization = 6 [default = false]; + optional float custom_div = 7 [default = -1.0]; + optional float custom_sub = 8 [default = -1.0]; + // Whether the input image should be flipped vertically (along the // y-direction). This is useful, for example, when the input image is defined // with a coordinate system where the origin is at the bottom-left corner diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc b/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc index d7d2d5fd1..cecf84e6f 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc @@ -19,6 +19,9 @@ #include "mediapipe/calculators/tflite/tflite_converter_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/matrix.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/integral_types.h" @@ -28,7 +31,6 @@ #include "tensorflow/lite/interpreter.h" namespace mediapipe { - namespace { constexpr char kTransposeOptionsString[] = @@ -196,4 +198,55 @@ TEST_F(TfLiteConverterCalculatorTest, RandomMatrixRowMajor) { } } +TEST_F(TfLiteConverterCalculatorTest, CustomDivAndSub) { + CalculatorGraph graph; + // Run the calculator and verify that one output is generated. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie(R"( + input_stream: "input_image" + node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + row_major_matrix: true + use_custom_normalization: true + custom_div: 2.0 + custom_sub: 33.0 + } + } + } + )"); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 1); + cv::Mat mat = ::mediapipe::formats::MatView(input_image.get()); + mat.at(0, 0) = 200; + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Wait until the calculator done processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(1, output_packets.size()); + + // Get and process results. + const std::vector& tensor_vec = + output_packets[0].Get>(); + EXPECT_EQ(1, tensor_vec.size()); + + const TfLiteTensor* tensor = &tensor_vec[0]; + EXPECT_EQ(kTfLiteFloat32, tensor->type); + EXPECT_FLOAT_EQ(67.0f, *tensor->data.f); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index c52db9c79..d9aba77c6 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -57,7 +57,10 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate.h" #include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h" #endif // iOS + +#if !defined(MEDIAPIPE_EDGE_TPU) #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" +#endif // !EDGETPU #if defined(MEDIAPIPE_ANDROID) #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #endif // ANDROID @@ -116,11 +119,13 @@ using ::tflite::gpu::gl::GlBuffer; #endif #if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) +namespace { struct GPUData { int elements = 1; GpuTensor buffer; ::tflite::gpu::BHWC shape; }; +} // namespace #endif // Returns number of threads to configure XNNPACK delegate with. @@ -405,8 +410,11 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 1. Receive pre-processed tensor inputs. if (use_advanced_gpu_api_) { #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) + if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } const auto& input_tensors = - cc->Inputs().Tag("TENSORS_GPU").Get>(); + cc->Inputs().Tag(kTensorsGpuTag).Get>(); RET_CHECK(!input_tensors.empty()); MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &input_tensors]() -> ::mediapipe::Status { @@ -424,6 +432,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } else if (gpu_input_) { // Read GPU input into SSBO. #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) + if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } const auto& input_tensors = cc->Inputs().Tag(kTensorsGpuTag).Get>(); RET_CHECK_GT(input_tensors.size(), 0); @@ -439,6 +450,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); return ::mediapipe::OkStatus(); })); #elif defined(MEDIAPIPE_IOS) + if (cc->Inputs().Tag(kTensorsGpuTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } const auto& input_tensors = cc->Inputs().Tag(kTensorsGpuTag).Get>(); RET_CHECK_GT(input_tensors.size(), 0); @@ -465,6 +479,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); RET_CHECK_FAIL() << "GPU processing not enabled."; #endif } else { + if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } // Read CPU input into tensors. const auto& input_tensors = cc->Inputs().Tag(kTensorsTag).Get>(); @@ -511,10 +528,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); auto output_tensors = absl::make_unique>(); output_tensors->resize(gpu_data_out_.size()); for (int i = 0; i < gpu_data_out_.size(); ++i) { - output_tensors->at(i) = gpu_data_out_[0]->buffer.MakeRef(); + output_tensors->at(i) = gpu_data_out_[i]->buffer.MakeRef(); } cc->Outputs() - .Tag("TENSORS_GPU") + .Tag(kTensorsGpuTag) .Add(output_tensors.release(), cc->InputTimestamp()); #endif } else if (gpu_output_) { @@ -637,7 +654,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED; tflite_gpu_runner_ = std::make_unique(options); - return tflite_gpu_runner_->InitializeWithModel(model); + return tflite_gpu_runner_->InitializeWithModel(model, op_resolver); } #endif @@ -730,6 +747,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); calculator_opts.delegate().has_xnnpack(); #endif // __EMSCRIPTEN__ +#if !defined(MEDIAPIPE_EDGE_TPU) if (xnnpack_requested) { TfLiteXNNPackDelegateOptions xnnpack_opts{}; xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts); @@ -738,6 +756,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()), kTfLiteOk); } +#endif // !EDGETPU // Return, no need for GPU delegate below. return ::mediapipe::OkStatus(); diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc index a1193cdf2..23a85276d 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc @@ -77,7 +77,10 @@ using ::tflite::gpu::gl::GlShader; // Performs optional upscale to REFERENCE_IMAGE dimensions if provided, // otherwise the mask is the same size as input tensor. // -// Produces result as an RGBA image, with the mask in both R & A channels. +// Produces result as an RGBA image, with the mask in both R & A channels. The +// value of each pixel is the probability of the specified class after softmax, +// scaled to 255 on CPU. The class can be specified through the +// |output_layer_index| option. // // Inputs: // One of the following TENSORS tags: diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 2aae53d8e..9edd8deba 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -276,6 +276,41 @@ cc_test( ], ) +cc_library( + name = "clock_timestamp_calculator", + srcs = ["clock_timestamp_calculator.cc"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/deps:clock", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/time", + ], + alwayslink = 1, +) + +cc_library( + name = "clock_latency_calculator", + srcs = ["clock_latency_calculator.cc"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/time", + ], + alwayslink = 1, +) + cc_library( name = "annotation_overlay_calculator", srcs = ["annotation_overlay_calculator.cc"], diff --git a/mediapipe/calculators/util/clock_latency_calculator.cc b/mediapipe/calculators/util/clock_latency_calculator.cc new file mode 100644 index 000000000..768abb2a4 --- /dev/null +++ b/mediapipe/calculators/util/clock_latency_calculator.cc @@ -0,0 +1,116 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/time/time.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { +namespace { +// Tag name for reference signal. +constexpr char kReferenceTag[] = "REFERENCE"; +} // namespace + +// A calculator that diffs multiple input absl::Time streams against a +// reference Time stream, and outputs the resulting absl::Duration's. Useful +// in combination with ClockTimestampCalculator to be able to determine the +// latency between two different points in a graph. +// +// Inputs: At least one non-reference Time stream is required. +// 0- Time stream 0 +// 1- Time stream 1 +// ... +// N- Time stream N +// REFERENCE_SIGNAL (required): The Time stream by which all others are +// compared. Should be the stream from which our other streams were +// computed, in order to provide meaningful latency results. +// +// Outputs: +// 0- Duration from REFERENCE_SIGNAL to input stream 0 +// 1- Duration from REFERENCE_SIGNAL to input stream 1 +// ... +// N- Duration from REFERENCE_SIGNAL to input stream N +// +// Example config: +// node { +// calculator: "ClockLatencyCalculator" +// input_stream: "packet_clocktime_stream_0" +// input_stream: "packet_clocktime_stream_1" +// input_stream: "packet_clocktime_stream_2" +// input_stream: "REFERENCE_SIGNAL: packet_clocktime_stream_reference" +// output_stream: "packet_latency_stream_0" +// output_stream: "packet_latency_stream_1" +// output_stream: "packet_latency_stream_2" +// } +// +class ClockLatencyCalculator : public CalculatorBase { + public: + ClockLatencyCalculator() {} + + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + int64 num_packet_streams_ = -1; +}; +REGISTER_CALCULATOR(ClockLatencyCalculator); + +::mediapipe::Status ClockLatencyCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK_GT(cc->Inputs().NumEntries(), 1); + + int64 num_packet_streams = cc->Inputs().NumEntries() - 1; + RET_CHECK_EQ(cc->Outputs().NumEntries(), num_packet_streams); + + for (int64 i = 0; i < num_packet_streams; ++i) { + cc->Inputs().Index(i).Set(); + cc->Outputs().Index(i).Set(); + } + cc->Inputs().Tag(kReferenceTag).Set(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status ClockLatencyCalculator::Open(CalculatorContext* cc) { + // Direct passthrough, as far as timestamp and bounds are concerned. + cc->SetOffset(TimestampDiff(0)); + num_packet_streams_ = cc->Inputs().NumEntries() - 1; + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status ClockLatencyCalculator::Process(CalculatorContext* cc) { + // Get reference time. + RET_CHECK(!cc->Inputs().Tag(kReferenceTag).IsEmpty()); + const absl::Time& reference_time = + cc->Inputs().Tag(kReferenceTag).Get(); + + // Push Duration packets for every input stream we have. + for (int64 i = 0; i < num_packet_streams_; ++i) { + if (!cc->Inputs().Index(i).IsEmpty()) { + const absl::Time& input_stream_time = + cc->Inputs().Index(i).Get(); + cc->Outputs().Index(i).AddPacket( + MakePacket(input_stream_time - reference_time) + .At(cc->InputTimestamp())); + } + } + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/clock_timestamp_calculator.cc b/mediapipe/calculators/util/clock_timestamp_calculator.cc new file mode 100644 index 000000000..ea715f8ae --- /dev/null +++ b/mediapipe/calculators/util/clock_timestamp_calculator.cc @@ -0,0 +1,108 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/time/time.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/clock.h" +#include "mediapipe/framework/deps/monotonic_clock.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { +namespace { +// Tag name for clock side packet. +constexpr char kClockTag[] = "CLOCK"; +} // namespace + +// A calculator that outputs the current clock time at which it receives input +// packets. Use a separate instance of this calculator for each input stream +// you wish to output a clock time for. +// +// InputSidePacket (Optional): +// CLOCK: A clock to use for querying the current time. +// +// Inputs: +// A single packet stream we wish to get the current clocktime for + +// Outputs: +// A single stream of absl::Time packets, representing the clock time at which +// we received the input stream's packets. + +// Example config: +// node { +// calculator: "ClockTimestampCalculator" +// input_side_packet: "CLOCK:monotonic_clock" +// input_stream: "packet_stream" +// output_stream: "packet_clocktime_stream" +// } +// +class ClockTimestampCalculator : public CalculatorBase { + public: + ClockTimestampCalculator() {} + + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + // Clock object. + std::shared_ptr<::mediapipe::Clock> clock_; +}; +REGISTER_CALCULATOR(ClockTimestampCalculator); + +::mediapipe::Status ClockTimestampCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK_EQ(cc->Inputs().NumEntries(), 1); + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); + + cc->Inputs().Index(0).SetAny(); + cc->Outputs().Index(0).Set(); + + // Optional Clock input side packet. + if (cc->InputSidePackets().HasTag(kClockTag)) { + cc->InputSidePackets() + .Tag(kClockTag) + .Set>(); + } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status ClockTimestampCalculator::Open(CalculatorContext* cc) { + // Direct passthrough, as far as timestamp and bounds are concerned. + cc->SetOffset(TimestampDiff(0)); + + // Initialize the clock. + if (cc->InputSidePackets().HasTag(kClockTag)) { + clock_ = cc->InputSidePackets() + .Tag("CLOCK") + .Get>(); + } else { + clock_.reset( + ::mediapipe::MonotonicClock::CreateSynchronizedMonotonicClock()); + } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status ClockTimestampCalculator::Process(CalculatorContext* cc) { + // Push the Time packet to output. + auto timestamp_packet = MakePacket(clock_->TimeNow()); + cc->Outputs().Index(0).AddPacket(timestamp_packet.At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/detections_to_render_data_calculator.cc b/mediapipe/calculators/util/detections_to_render_data_calculator.cc index 1077f7687..731994d4f 100644 --- a/mediapipe/calculators/util/detections_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/detections_to_render_data_calculator.cc @@ -27,6 +27,7 @@ namespace mediapipe { namespace { +constexpr char kDetectionTag[] = "DETECTION"; constexpr char kDetectionsTag[] = "DETECTIONS"; constexpr char kDetectionListTag[] = "DETECTION_LIST"; constexpr char kRenderDataTag[] = "RENDER_DATA"; @@ -62,6 +63,7 @@ constexpr float kNumScoreDecimalDigitsMultipler = 100; // Example config: // node { // calculator: "DetectionsToRenderDataCalculator" +// input_stream: "DETECTION:detection" // input_stream: "DETECTIONS:detections" // input_stream: "DETECTION_LIST:detection_list" // output_stream: "RENDER_DATA:render_data" @@ -123,9 +125,13 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator); ::mediapipe::Status DetectionsToRenderDataCalculator::GetContract( CalculatorContract* cc) { RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) || - cc->Inputs().HasTag(kDetectionsTag)) + cc->Inputs().HasTag(kDetectionsTag) || + cc->Inputs().HasTag(kDetectionTag)) << "None of the input streams are provided."; + if (cc->Inputs().HasTag(kDetectionTag)) { + cc->Inputs().Tag(kDetectionTag).Set(); + } if (cc->Inputs().HasTag(kDetectionListTag)) { cc->Inputs().Tag(kDetectionListTag).Set(); } @@ -155,8 +161,10 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator); const bool has_detection_from_vector = cc->Inputs().HasTag(kDetectionsTag) && !cc->Inputs().Tag(kDetectionsTag).Get>().empty(); + const bool has_single_detection = cc->Inputs().HasTag(kDetectionTag) && + !cc->Inputs().Tag(kDetectionTag).IsEmpty(); if (!options.produce_empty_packet() && !has_detection_from_list && - !has_detection_from_vector) { + !has_detection_from_vector && !has_single_detection) { return ::mediapipe::OkStatus(); } @@ -176,6 +184,10 @@ REGISTER_CALCULATOR(DetectionsToRenderDataCalculator); AddDetectionToRenderData(detection, options, render_data.get()); } } + if (has_single_detection) { + AddDetectionToRenderData(cc->Inputs().Tag(kDetectionTag).Get(), + options, render_data.get()); + } cc->Outputs() .Tag(kRenderDataTag) .Add(render_data.release(), cc->InputTimestamp()); diff --git a/mediapipe/calculators/util/landmarks_to_detection_calculator.cc b/mediapipe/calculators/util/landmarks_to_detection_calculator.cc index 5f429cabf..64a7a8cc6 100644 --- a/mediapipe/calculators/util/landmarks_to_detection_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_detection_calculator.cc @@ -76,7 +76,7 @@ Detection ConvertLandmarksToDetection(const NormalizedLandmarkList& landmarks) { // node { // calculator: "LandmarksToDetectionCalculator" // input_stream: "NORM_LANDMARKS:landmarks" -// output_stream: "DETECTIONS:detections" +// output_stream: "DETECTION:detections" // } class LandmarksToDetectionCalculator : public CalculatorBase { public: diff --git a/mediapipe/calculators/util/non_max_suppression_calculator.cc b/mediapipe/calculators/util/non_max_suppression_calculator.cc index 5836a5a6a..1ea1b3d6b 100644 --- a/mediapipe/calculators/util/non_max_suppression_calculator.cc +++ b/mediapipe/calculators/util/non_max_suppression_calculator.cc @@ -303,12 +303,12 @@ class NonMaxSuppressionCalculator : public CalculatorBase { IndexedScores candidates; output_detections->clear(); while (!remained_indexed_scores.empty()) { + const int original_indexed_scores_size = remained_indexed_scores.size(); const auto& detection = detections[remained_indexed_scores[0].first]; if (options_.min_score_threshold() > 0 && detection.score(0) < options_.min_score_threshold()) { break; } - remained.clear(); candidates.clear(); const Location location(detection.location_data()); @@ -365,8 +365,15 @@ class NonMaxSuppressionCalculator : public CalculatorBase { keypoint->set_y(keypoints[i * 2 + 1] / total_score); } } - remained_indexed_scores = std::move(remained); + output_detections->push_back(weighted_detection); + // Breaks the loop if the size of indexed scores doesn't change after an + // iteration. + if (original_indexed_scores_size == remained.size()) { + break; + } else { + remained_indexed_scores = std::move(remained); + } } } diff --git a/mediapipe/docs/android_archive_library.md b/mediapipe/docs/android_archive_library.md index 37bffb39f..f0fef4113 100644 --- a/mediapipe/docs/android_archive_library.md +++ b/mediapipe/docs/android_archive_library.md @@ -2,12 +2,12 @@ ***Experimental Only*** -The MediaPipe Android archive library is a convenient way to use MediaPipe with -Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can be -used by all projects. Instead, developers need to add a mediapipe_aar() target -to generate a custom AAR file for their own projects. This is necessary in order -to include specific resources such as MediaPipe calculators needed for each -project. +The MediaPipe Android Archive (AAR) library is a convenient way to use MediaPipe +with Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can +be used by all projects. Instead, developers need to add a mediapipe_aar() +target to generate a custom AAR file for their own projects. This is necessary +in order to include specific resources such as MediaPipe calculators needed for +each project. ### Steps to build a MediaPipe AAR diff --git a/mediapipe/docs/building_examples.md b/mediapipe/docs/building_examples.md new file mode 100644 index 000000000..73c139fea --- /dev/null +++ b/mediapipe/docs/building_examples.md @@ -0,0 +1,327 @@ +# Building MediaPipe Examples + +* [Android](#android) +* [iOS](#ios) +* [Desktop](#desktop) + +## Android + +### Prerequisite + +* Java Runtime. +* Android SDK release 28.0.3 and above. +* Android NDK r18b and above. + +MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see +below for Android Studio setup). However, if you prefer using MediaPipe without +Android Studio, please run +[`setup_android_sdk_and_ndk.sh`](https://github.com/google/mediapipe/tree/master/setup_android_sdk_and_ndk.sh) +to download and setup Android SDK and NDK before building any Android example +apps. + +If Android SDK and NDK are already installed (e.g., by Android Studio), set +$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK. + +```bash +export ANDROID_HOME= +export ANDROID_NDK_HOME= +``` + +In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch +to a lower Android API level. You can achieve this by specifying `api_level = +` in android_ndk_repository() and/or android_sdk_repository() +in the [`WORKSPACE`](https://github.com/google/mediapipe/tree/master/WORKSPACE) file. + +Please verify all the necessary packages are installed. + +* Android SDK Platform API Level 28 or 29 +* Android SDK Build-Tools 28 or 29 +* Android SDK Platform-Tools 28 or 29 +* Android SDK Tools 26.1.1 +* Android NDK 17c or above + +### Option 1: Build with Bazel in Command Line + +1. To build an Android example app, for instance, for MediaPipe Hand, run: + + Note: To reduce the binary size, consider appending `--linkopt="-s"` to the + command below to strip symbols. + + ~~~ + ```bash + bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu + ``` + ~~~ + +1. Install it on a device with: + + ```bash + adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk + ``` + +### Option 2: Build with Bazel in Android Studio + +The MediaPipe project can be imported into Android Studio using the Bazel +plugins. This allows the MediaPipe examples to be built and modified in Android +Studio. + +To incorporate MediaPipe into an existing Android Studio project, see these +[instructions](./android_archive_library.md) that use Android Archive (AAR) and +Gradle. + +The steps below use Android Studio 3.5 to build and install a MediaPipe example +app: + +1. Install and launch Android Studio 3.5. + +2. Select `Configure` | `SDK Manager` | `SDK Platforms`. + + * Verify that Android SDK Platform API Level 28 or 29 is installed. + * Take note of the Android SDK Location, e.g., + `/usr/local/home/Android/Sdk`. + +3. Select `Configure` | `SDK Manager` | `SDK Tools`. + + * Verify that Android SDK Build-Tools 28 or 29 is installed. + * Verify that Android SDK Platform-Tools 28 or 29 is installed. + * Verify that Android SDK Tools 26.1.1 is installed. + * Verify that Android NDK 17c or above is installed. + * Take note of the Android NDK Location, e.g., + `/usr/local/home/Android/Sdk/ndk-bundle` or + `/usr/local/home/Android/Sdk/ndk/20.0.5594570`. + +4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point + to the installed SDK and NDK. + + ```bash + export ANDROID_HOME=/usr/local/home/Android/Sdk + + # If the NDK libraries are installed by a previous version of Android Studio, do + export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle + # If the NDK libraries are installed by Android Studio 3.5, do + export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/ + ``` + +5. Select `Configure` | `Plugins` install `Bazel`. + +6. On Linux, select `File` | `Settings`| `Bazel settings`. On macos, select + `Android Studio` | `Preferences` | `Bazel settings`. Then, modify `Bazel + binary location` to be the same as the output of `$ which bazel`. + +7. Select `Import Bazel Project`. + + * Select `Workspace`: `/path/to/mediapipe` and select `Next`. + * Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select + `Next`. + * Modify `Project View` to be the following and select `Finish`. + + ``` + directories: + # read project settings, e.g., .bazelrc + . + -mediapipe/objc + -mediapipe/examples/ios + + targets: + //mediapipe/examples/android/...:all + //mediapipe/java/...:all + + android_sdk_platform: android-29 + + sync_flags: + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain + ``` + +8. Select `Bazel` | `Sync` | `Sync project with Build files`. + + Note: Even after doing step 4, if you still see the error: `"no such package + '@androidsdk//': Either the path attribute of android_sdk_repository or the + ANDROID_HOME environment variable must be set."`, please modify the + [`WORKSPACE`](https://github.com/google/mediapipe/tree/master/WORKSPACE) file to point to your + SDK and NDK library locations, as below: + + ``` + android_sdk_repository( + name = "androidsdk", + path = "/path/to/android/sdk" + ) + + android_ndk_repository( + name = "androidndk", + path = "/path/to/android/ndk" + ) + ``` + +9. Connect an Android device to the workstation. + +10. Select `Run...` | `Edit Configurations...`. + + * Select `Templates` | `Bazel Command`. + * Enter Target Expression: + `//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu` + * Enter Bazel command: `mobile-install`. + * Enter Bazel flags: `-c opt --config=android_arm64`. + * Press the `[+]` button to add the new configuration. + * Select `Run` to run the example app on the connected Android device. + +## iOS + +### Prerequisite + +1. Install [Xcode](https://developer.apple.com/xcode/) and the Command Line + Tools. + + Follow Apple's instructions to obtain the required development certificates + and provisioning profiles for your iOS device. Install the Command Line + Tools by + + ```bash + xcode-select --install + ``` + +2. Install [Bazel](https://bazel.build/). + + We recommend using [Homebrew](https://brew.sh/) to get the latest version. + +3. Set Python 3.7 as the default Python version and install the Python "six" + library. + + To make Mediapipe work with TensorFlow, please set Python 3.7 as the default + Python version and install the Python "six" library. + + ```bash + pip3 install --user six + ``` + +4. Clone the MediaPipe repository. + + ```bash + git clone https://github.com/google/mediapipe.git + ``` + +5. Symlink or copy your provisioning profile to + `mediapipe/mediapipe/provisioning_profile.mobileprovision`. + + ```bash + cd mediapipe + ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision + ``` + + Tip: You can use this command to see the provisioning profiles you have + previously downloaded using Xcode: `open + ~/Library/MobileDevice/"Provisioning Profiles"`. If there are none, generate + and download a profile on + [Apple's developer site](https://developer.apple.com/account/resources/). + +### Option 1: Build with Bazel in Command Line + +1. Modify the `bundle_id` field of the app's `ios_application` target to use + your own identifier. For instance, for + [MediaPipe Hand](./hand_tracking_mobile_gpu.md), the `bundle_id` is in the + `HandTrackingGpuApp` target in the + [BUILD](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD) + file. + +2. Again using [MediaPipe Hand](./hand_tracking_mobile_gpu.md) for example, + run: + + ```bash + bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp + ``` + + You may see a permission request from `codesign` in order to sign the app. + +3. In Xcode, open the `Devices and Simulators` window (command-shift-2). + +4. Make sure your device is connected. You will see a list of installed apps. + Press the "+" button under the list, and select the `.ipa` file built by + Bazel. + +5. You can now run the app on your device. + +### Option 2: Build in Xcode + +Note: This workflow requires a separate tool in addition to Bazel. If it fails +to work for some reason, please resort to the command-line build instructions in +the previous section. + +1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating + Xcode projects from Bazel build configurations. + + ```bash + # cd out of the mediapipe directory, then: + git clone https://github.com/bazelbuild/tulsi.git + cd tulsi + # remove Xcode version from Tulsi's .bazelrc (see http://github.com/bazelbuild/tulsi#building-and-installing): + sed -i .orig '/xcode_version/d' .bazelrc + # build and run Tulsi: + sh build_and_run.sh + ``` + + This will install `Tulsi.app` inside the `Applications` directory in your + home directory. + +2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app. + + Important: If Tulsi displays an error saying "Bazel could not be found", + press the "Bazel..." button in the Packages tab and select the `bazel` + executable in your homebrew `/bin/` directory. + +3. Select the MediaPipe config in the Configs tab, then press the Generate + button below. You will be asked for a location to save the Xcode project. + Once the project is generated, it will be opened in Xcode. + +4. You can now select any of the MediaPipe demos in the target menu, and build + and run them as normal. + + Note: When you ask Xcode to run an app, by default it will use the Debug + configuration. Some of our demos are computationally heavy; you may want to + use the Release configuration for better performance. + + Tip: To switch build configuration in Xcode, click on the target menu, + choose "Edit Scheme...", select the Run action, and switch the Build + Configuration from Debug to Release. Note that this is set independently for + each target. + +## Desktop + +### Option 1: Running on CPU + +1. To build, for example, [MediaPipe Hand](./hand_tracking_mobile_gpu.md), run: + + ```bash + bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu + ``` + + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible. + +2. To run the application: + + ```bash + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \ + --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt + ``` + +### Option 2: Running on GPU + +Note: This currently works only on Linux, and please first follow +[OpenGL ES Setup on Linux Desktop](./gpu.md#opengl-es-setup-on-linux-desktop). + +1. To build, for example, [MediaPipe Hand](./hand_tracking_mobile_gpu.md), run: + + ```bash + bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \ + mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu + ``` + + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible, or GPU drivers not setup + properly. + +2. To run the application: + + ```bash + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ + --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt + ``` diff --git a/mediapipe/docs/data/visualizer/sample_trace.binarypb b/mediapipe/docs/data/visualizer/sample_trace.binarypb new file mode 100644 index 000000000..fce47d8e5 Binary files /dev/null and b/mediapipe/docs/data/visualizer/sample_trace.binarypb differ diff --git a/mediapipe/docs/face_detection_mobile_cpu.md b/mediapipe/docs/face_detection_mobile_cpu.md index a68aee1c5..4102a22b9 100644 --- a/mediapipe/docs/face_detection_mobile_cpu.md +++ b/mediapipe/docs/face_detection_mobile_cpu.md @@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facedetectioncpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the FaceDetectionCpuApp target. diff --git a/mediapipe/docs/face_detection_mobile_gpu.md b/mediapipe/docs/face_detection_mobile_gpu.md index 7a1b6152c..b99350e94 100644 --- a/mediapipe/docs/face_detection_mobile_gpu.md +++ b/mediapipe/docs/face_detection_mobile_gpu.md @@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facedetectiongpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the FaceDetectionGpuApp target. diff --git a/mediapipe/docs/face_mesh_mobile_gpu.md b/mediapipe/docs/face_mesh_mobile_gpu.md index f85cdabcd..d594a26e4 100644 --- a/mediapipe/docs/face_mesh_mobile_gpu.md +++ b/mediapipe/docs/face_mesh_mobile_gpu.md @@ -40,7 +40,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the FaceMeshGpuApp target. diff --git a/mediapipe/docs/hand_detection_mobile_gpu.md b/mediapipe/docs/hand_detection_mobile_gpu.md index 53b4d9905..a052de0b3 100644 --- a/mediapipe/docs/hand_detection_mobile_gpu.md +++ b/mediapipe/docs/hand_detection_mobile_gpu.md @@ -41,7 +41,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the HandDetectionGpuApp target. diff --git a/mediapipe/docs/hand_tracking_desktop.md b/mediapipe/docs/hand_tracking_desktop.md index b7d587749..f48008109 100644 --- a/mediapipe/docs/hand_tracking_desktop.md +++ b/mediapipe/docs/hand_tracking_desktop.md @@ -129,6 +129,7 @@ node { output_stream: "LANDMARKS:hand_landmarks" output_stream: "NORM_RECT:hand_rect_from_landmarks" output_stream: "PRESENCE:hand_presence" + output_stream: "HANDEDNESS:handedness" } # Caches a hand rectangle fed back from HandLandmarkSubgraph, and upon the @@ -171,6 +172,7 @@ node { input_stream: "LANDMARKS:hand_landmarks" input_stream: "NORM_RECT:hand_rect" input_stream: "DETECTIONS:palm_detections" + input_stream: "HANDEDNESS:handedness" output_stream: "IMAGE:output_video" } diff --git a/mediapipe/docs/hand_tracking_mobile_gpu.md b/mediapipe/docs/hand_tracking_mobile_gpu.md index edc50f9c2..b81a77187 100644 --- a/mediapipe/docs/hand_tracking_mobile_gpu.md +++ b/mediapipe/docs/hand_tracking_mobile_gpu.md @@ -1,725 +1,154 @@ -# Hand Tracking (GPU) +# MediaPipe Hand -This doc focuses on the -[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt) -that performs hand tracking with TensorFlow Lite on GPU. It is related to the -[hand detection example](./hand_detection_mobile_gpu.md), and we recommend users -to review the hand detection example first. +## Overview -For overall context on hand detection and hand tracking, please read this -[Google AI Blog post](https://mediapipe.page.link/handgoogleaiblog). +The ability to perceive the shape and motion of hands can be a vital component +in improving the user experience across a variety of technological domains and +platforms. For example, it can form the basis for sign language understanding +and hand gesture control, and can also enable the overlay of digital content and +information on top of the physical world in augmented reality. While coming +naturally to people, robust real-time hand perception is a decidedly challenging +computer vision task, as hands often occlude themselves or each other (e.g. +finger/palm occlusions and hand shakes) and lack high contrast patterns. -![hand_tracking_android_gpu.gif](images/mobile/hand_tracking_android_gpu.gif) - -In the visualization above, the red dots represent the localized hand landmarks, -and the green lines are simply connections between selected landmark pairs for -visualization of the hand skeleton. The red box represents a hand rectangle that -covers the entire hand, derived either from hand detection (see -[hand detection example](./hand_detection_mobile_gpu.md)) or from the pervious -round of hand landmark localization using an ML model (see also -[model card](https://mediapipe.page.link/handmc)). Hand landmark localization is -performed only within the hand rectangle for computational efficiency and -accuracy, and hand detection is only invoked when landmark localization could -not identify hand presence in the previous iteration. - -The example can also run in a mode that localizes hand landmarks in 3D (i.e., -estimating an extra z coordinate): +MediaPipe Hand is a high-fidelity hand and finger tracking solution. It employs +machine learning (ML) to infer 21 3D landmarks of a hand from just a single +frame. Whereas current state-of-the-art approaches rely primarily on powerful +desktop environments for inference, our method achieves real-time performance on +a mobile phone, and even scales to multiple hands. We hope that providing this +hand perception functionality to the wider research and development community +will result in an emergence of creative use cases, stimulating new applications +and new research avenues. ![hand_tracking_3d_android_gpu.gif](images/mobile/hand_tracking_3d_android_gpu.gif) -In the visualization above, the localized hand landmarks are represented by dots -in different shades, with the brighter ones denoting landmarks closer to the -camera. - -## Android - -[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu) - -An arm64 APK can be -[downloaded here](https://drive.google.com/open?id=1uCjS0y0O0dTDItsMh8x2cf4-l3uHW1vE), -and a version running the 3D mode can be -[downloaded here](https://drive.google.com/open?id=1tGgzOGkcZglJO2i7e8NKSxJgVtJYS3ka). - -To build the app yourself, run: - -```bash -bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu -``` - -To build for the 3D mode, run: - -```bash -bazel build -c opt --config=android_arm64 --define 3D=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu -``` - -Once the app is built, install it on Android device with: - -```bash -adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk -``` - -## iOS - -[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu). - -See the general [instructions](./mediapipe_ios_setup.md) for building iOS -examples and generating an Xcode project. This will be the HandDetectionGpuApp -target. - -To build on the command line: - -```bash -bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp -``` - -To build for the 3D mode, run: - -```bash -bazel build -c opt --config=ios_arm64 --define 3D=true mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp -``` - -## Graph - -The hand tracking [main graph](#main-graph) internally utilizes a -[hand detection subgraph](#hand-detection-subgraph), a -[hand landmark subgraph](#hand-landmark-subgraph) and a -[renderer subgraph](#renderer-subgraph). - -The subgraphs show up in the main graph visualization as nodes colored in -purple, and the subgraph itself can also be visualized just like a regular -graph. For more information on how to visualize a graph that includes subgraphs, -see the Visualizing Subgraphs section in the -[visualizer documentation](./visualizer.md). - -### Main Graph - -![hand_tracking_mobile_graph](images/mobile/hand_tracking_mobile.png) - -[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt) - -```bash -# MediaPipe graph that performs hand tracking with TensorFlow Lite on GPU. -# Used in the examples in -# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and -# mediapipe/examples/ios/handtrackinggpu. - -# Images coming into and out of the graph. -input_stream: "input_video" -output_stream: "output_video" - -# Throttles the images flowing downstream for flow control. It passes through -# the very first incoming image unaltered, and waits for downstream nodes -# (calculators and subgraphs) in the graph to finish their tasks before it -# passes through another image. All images that come in while waiting are -# dropped, limiting the number of in-flight images in most part of the graph to -# 1. This prevents the downstream nodes from queuing up incoming images and data -# excessively, which leads to increased latency and memory usage, unwanted in -# real-time mobile applications. It also eliminates unnecessarily computation, -# e.g., the output produced by a node may get dropped downstream if the -# subsequent nodes are still busy processing previous inputs. -node { - calculator: "FlowLimiterCalculator" - input_stream: "input_video" - input_stream: "FINISHED:hand_rect" - input_stream_info: { - tag_index: "FINISHED" - back_edge: true - } - output_stream: "throttled_input_video" -} - -# Caches a hand-presence decision fed back from HandLandmarkSubgraph, and upon -# the arrival of the next input image sends out the cached decision with the -# timestamp replaced by that of the input image, essentially generating a packet -# that carries the previous hand-presence decision. Note that upon the arrival -# of the very first input image, an empty packet is sent out to jump start the -# feedback loop. -node { - calculator: "PreviousLoopbackCalculator" - input_stream: "MAIN:throttled_input_video" - input_stream: "LOOP:hand_presence" - input_stream_info: { - tag_index: "LOOP" - back_edge: true - } - output_stream: "PREV_LOOP:prev_hand_presence" -} - -# Drops the incoming image if HandLandmarkSubgraph was able to identify hand -# presence in the previous image. Otherwise, passes the incoming image through -# to trigger a new round of hand detection in HandDetectionSubgraph. -node { - calculator: "GateCalculator" - input_stream: "throttled_input_video" - input_stream: "DISALLOW:prev_hand_presence" - output_stream: "hand_detection_input_video" - - node_options: { - [type.googleapis.com/mediapipe.GateCalculatorOptions] { - empty_packets_as_allow: true - } - } -} - -# Subgraph that detections hands (see hand_detection_gpu.pbtxt). -node { - calculator: "HandDetectionSubgraph" - input_stream: "hand_detection_input_video" - output_stream: "DETECTIONS:palm_detections" - output_stream: "NORM_RECT:hand_rect_from_palm_detections" -} - -# Subgraph that localizes hand landmarks (see hand_landmark_gpu.pbtxt). -node { - calculator: "HandLandmarkSubgraph" - input_stream: "IMAGE:throttled_input_video" - input_stream: "NORM_RECT:hand_rect" - output_stream: "LANDMARKS:hand_landmarks" - output_stream: "NORM_RECT:hand_rect_from_landmarks" - output_stream: "PRESENCE:hand_presence" -} - -# Caches a hand rectangle fed back from HandLandmarkSubgraph, and upon the -# arrival of the next input image sends out the cached rectangle with the -# timestamp replaced by that of the input image, essentially generating a packet -# that carries the previous hand rectangle. Note that upon the arrival of the -# very first input image, an empty packet is sent out to jump start the -# feedback loop. -node { - calculator: "PreviousLoopbackCalculator" - input_stream: "MAIN:throttled_input_video" - input_stream: "LOOP:hand_rect_from_landmarks" - input_stream_info: { - tag_index: "LOOP" - back_edge: true - } - output_stream: "PREV_LOOP:prev_hand_rect_from_landmarks" -} - -# Merges a stream of hand rectangles generated by HandDetectionSubgraph and that -# generated by HandLandmarkSubgraph into a single output stream by selecting -# between one of the two streams. The formal is selected if the incoming packet -# is not empty, i.e., hand detection is performed on the current image by -# HandDetectionSubgraph (because HandLandmarkSubgraph could not identify hand -# presence in the previous image). Otherwise, the latter is selected, which is -# never empty because HandLandmarkSubgraphs processes all images (that went -# through FlowLimiterCaculator). -node { - calculator: "MergeCalculator" - input_stream: "hand_rect_from_palm_detections" - input_stream: "prev_hand_rect_from_landmarks" - output_stream: "hand_rect" -} - -# Subgraph that renders annotations and overlays them on top of the input -# images (see renderer_gpu.pbtxt). -node { - calculator: "RendererSubgraph" - input_stream: "IMAGE:throttled_input_video" - input_stream: "LANDMARKS:hand_landmarks" - input_stream: "NORM_RECT:hand_rect" - input_stream: "DETECTIONS:palm_detections" - output_stream: "IMAGE:output_video" -} -``` - -### Hand Detection Subgraph - -![hand_detection_gpu_subgraph](images/mobile/hand_detection_gpu_subgraph.png) - -[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt) - -```bash -# MediaPipe hand detection subgraph. - -type: "HandDetectionSubgraph" - -input_stream: "input_video" -output_stream: "DETECTIONS:palm_detections" -output_stream: "NORM_RECT:hand_rect_from_palm_detections" - -# Transforms the input image on GPU to a 256x256 image. To scale the input -# image, the scale_mode option is set to FIT to preserve the aspect ratio, -# resulting in potential letterboxing in the transformed image. -node: { - calculator: "ImageTransformationCalculator" - input_stream: "IMAGE_GPU:input_video" - output_stream: "IMAGE_GPU:transformed_input_video" - output_stream: "LETTERBOX_PADDING:letterbox_padding" - node_options: { - [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { - output_width: 256 - output_height: 256 - scale_mode: FIT - } - } -} - -# Generates a single side packet containing a TensorFlow Lite op resolver that -# supports custom ops needed by the model used in this graph. -node { - calculator: "TfLiteCustomOpResolverCalculator" - output_side_packet: "opresolver" - node_options: { - [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] { - use_gpu: true - } - } -} - -# Converts the transformed input image on GPU into an image tensor stored as a -# TfLiteTensor. -node { - calculator: "TfLiteConverterCalculator" - input_stream: "IMAGE_GPU:transformed_input_video" - output_stream: "TENSORS_GPU:image_tensor" -} - -# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a -# vector of tensors representing, for instance, detection boxes/keypoints and -# scores. -node { - calculator: "TfLiteInferenceCalculator" - input_stream: "TENSORS_GPU:image_tensor" - output_stream: "TENSORS:detection_tensors" - input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" - node_options: { - [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { - model_path: "palm_detection.tflite" - use_gpu: true - } - } -} - -# Generates a single side packet containing a vector of SSD anchors based on -# the specification in the options. -node { - calculator: "SsdAnchorsCalculator" - output_side_packet: "anchors" - node_options: { - [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] { - num_layers: 5 - min_scale: 0.1171875 - max_scale: 0.75 - input_size_height: 256 - input_size_width: 256 - anchor_offset_x: 0.5 - anchor_offset_y: 0.5 - strides: 8 - strides: 16 - strides: 32 - strides: 32 - strides: 32 - aspect_ratios: 1.0 - fixed_anchor_size: true - } - } -} - -# Decodes the detection tensors generated by the TensorFlow Lite model, based on -# the SSD anchors and the specification in the options, into a vector of -# detections. Each detection describes a detected object. -node { - calculator: "TfLiteTensorsToDetectionsCalculator" - input_stream: "TENSORS:detection_tensors" - input_side_packet: "ANCHORS:anchors" - output_stream: "DETECTIONS:detections" - node_options: { - [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] { - num_classes: 1 - num_boxes: 2944 - num_coords: 18 - box_coord_offset: 0 - keypoint_coord_offset: 4 - num_keypoints: 7 - num_values_per_keypoint: 2 - sigmoid_score: true - score_clipping_thresh: 100.0 - reverse_output_order: true - - x_scale: 256.0 - y_scale: 256.0 - h_scale: 256.0 - w_scale: 256.0 - min_score_thresh: 0.7 - } - } -} - -# Performs non-max suppression to remove excessive detections. -node { - calculator: "NonMaxSuppressionCalculator" - input_stream: "detections" - output_stream: "filtered_detections" - node_options: { - [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] { - min_suppression_threshold: 0.3 - overlap_type: INTERSECTION_OVER_UNION - algorithm: WEIGHTED - return_empty_detections: true - } - } -} - -# Maps detection label IDs to the corresponding label text ("Palm"). The label -# map is provided in the label_map_path option. -node { - calculator: "DetectionLabelIdToTextCalculator" - input_stream: "filtered_detections" - output_stream: "labeled_detections" - node_options: { - [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] { - label_map_path: "palm_detection_labelmap.txt" - } - } -} - -# Adjusts detection locations (already normalized to [0.f, 1.f]) on the -# letterboxed image (after image transformation with the FIT scale mode) to the -# corresponding locations on the same image with the letterbox removed (the -# input image to the graph before image transformation). -node { - calculator: "DetectionLetterboxRemovalCalculator" - input_stream: "DETECTIONS:labeled_detections" - input_stream: "LETTERBOX_PADDING:letterbox_padding" - output_stream: "DETECTIONS:palm_detections" -} - -# Extracts image size from the input images. -node { - calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE_GPU:input_video" - output_stream: "SIZE:image_size" -} - -# Converts results of palm detection into a rectangle (normalized by image size) -# that encloses the palm and is rotated such that the line connecting center of -# the wrist and MCP of the middle finger is aligned with the Y-axis of the -# rectangle. -node { - calculator: "DetectionsToRectsCalculator" - input_stream: "DETECTIONS:palm_detections" - input_stream: "IMAGE_SIZE:image_size" - output_stream: "NORM_RECT:palm_rect" - node_options: { - [type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] { - rotation_vector_start_keypoint_index: 0 # Center of wrist. - rotation_vector_end_keypoint_index: 2 # MCP of middle finger. - rotation_vector_target_angle_degrees: 90 - output_zero_rect_for_empty_detections: true - } - } -} - -# Expands and shifts the rectangle that contains the palm so that it's likely -# to cover the entire hand. -node { - calculator: "RectTransformationCalculator" - input_stream: "NORM_RECT:palm_rect" - input_stream: "IMAGE_SIZE:image_size" - output_stream: "hand_rect_from_palm_detections" - node_options: { - [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { - scale_x: 2.6 - scale_y: 2.6 - shift_y: -0.5 - square_long: true - } - } -} -``` - -### Hand Landmark Subgraph - -![hand_landmark_gpu_subgraph.pbtxt](images/mobile/hand_landmark_gpu_subgraph.png) - -[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt) - -```bash -# MediaPipe hand landmark localization subgraph. - -type: "HandLandmarkSubgraph" - -input_stream: "IMAGE:input_video" -input_stream: "NORM_RECT:hand_rect" -output_stream: "LANDMARKS:hand_landmarks" -output_stream: "NORM_RECT:hand_rect_for_next_frame" -output_stream: "PRESENCE:hand_presence" - -# Crops the rectangle that contains a hand from the input image. -node { - calculator: "ImageCroppingCalculator" - input_stream: "IMAGE_GPU:input_video" - input_stream: "NORM_RECT:hand_rect" - output_stream: "IMAGE_GPU:hand_image" -} - -# Transforms the input image on GPU to a 256x256 image. To scale the input -# image, the scale_mode option is set to FIT to preserve the aspect ratio, -# resulting in potential letterboxing in the transformed image. -node: { - calculator: "ImageTransformationCalculator" - input_stream: "IMAGE_GPU:hand_image" - output_stream: "IMAGE_GPU:transformed_hand_image" - output_stream: "LETTERBOX_PADDING:letterbox_padding" - node_options: { - [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { - output_width: 256 - output_height: 256 - scale_mode: FIT - } - } -} - -# Converts the transformed input image on GPU into an image tensor stored as a -# TfLiteTensor. -node { - calculator: "TfLiteConverterCalculator" - input_stream: "IMAGE_GPU:transformed_hand_image" - output_stream: "TENSORS_GPU:image_tensor" -} - -# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a -# vector of tensors representing, for instance, detection boxes/keypoints and -# scores. -node { - calculator: "TfLiteInferenceCalculator" - input_stream: "TENSORS_GPU:image_tensor" - output_stream: "TENSORS:output_tensors" - node_options: { - [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { - model_path: "hand_landmark.tflite" - use_gpu: true - } - } -} - -# Splits a vector of tensors into multiple vectors. -node { - calculator: "SplitTfLiteTensorVectorCalculator" - input_stream: "output_tensors" - output_stream: "landmark_tensors" - output_stream: "hand_flag_tensor" - node_options: { - [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { - ranges: { begin: 0 end: 1 } - ranges: { begin: 1 end: 2 } - } - } -} - -# Converts the hand-flag tensor into a float that represents the confidence -# score of hand presence. -node { - calculator: "TfLiteTensorsToFloatsCalculator" - input_stream: "TENSORS:hand_flag_tensor" - output_stream: "FLOAT:hand_presence_score" -} - -# Applies a threshold to the confidence score to determine whether a hand is -# present. -node { - calculator: "ThresholdingCalculator" - input_stream: "FLOAT:hand_presence_score" - output_stream: "FLAG:hand_presence" - node_options: { - [type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] { - threshold: 0.1 - } - } -} - -# Decodes the landmark tensors into a vector of lanmarks, where the landmark -# coordinates are normalized by the size of the input image to the model. -node { - calculator: "TfLiteTensorsToLandmarksCalculator" - input_stream: "TENSORS:landmark_tensors" - output_stream: "NORM_LANDMARKS:landmarks" - node_options: { - [type.googleapis.com/mediapipe.TfLiteTensorsToLandmarksCalculatorOptions] { - num_landmarks: 21 - input_image_width: 256 - input_image_height: 256 - } - } -} - -# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand -# image (after image transformation with the FIT scale mode) to the -# corresponding locations on the same image with the letterbox removed (hand -# image before image transformation). -node { - calculator: "LandmarkLetterboxRemovalCalculator" - input_stream: "LANDMARKS:landmarks" - input_stream: "LETTERBOX_PADDING:letterbox_padding" - output_stream: "LANDMARKS:scaled_landmarks" -} - -# Projects the landmarks from the cropped hand image to the corresponding -# locations on the full image before cropping (input to the graph). -node { - calculator: "LandmarkProjectionCalculator" - input_stream: "NORM_LANDMARKS:scaled_landmarks" - input_stream: "NORM_RECT:hand_rect" - output_stream: "NORM_LANDMARKS:hand_landmarks" -} - -# Extracts image size from the input images. -node { - calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE_GPU:input_video" - output_stream: "SIZE:image_size" -} - -# Converts hand landmarks to a detection that tightly encloses all landmarks. -node { - calculator: "LandmarksToDetectionCalculator" - input_stream: "NORM_LANDMARKS:hand_landmarks" - output_stream: "DETECTION:hand_detection" -} - -# Converts the hand detection into a rectangle (normalized by image size) -# that encloses the hand and is rotated such that the line connecting center of -# the wrist and MCP of the middle finger is aligned with the Y-axis of the -# rectangle. -node { - calculator: "DetectionsToRectsCalculator" - input_stream: "DETECTION:hand_detection" - input_stream: "IMAGE_SIZE:image_size" - output_stream: "NORM_RECT:hand_rect_from_landmarks" - node_options: { - [type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] { - rotation_vector_start_keypoint_index: 0 # Center of wrist. - rotation_vector_end_keypoint_index: 9 # MCP of middle finger. - rotation_vector_target_angle_degrees: 90 - } - } -} - -# Expands the hand rectangle so that in the next video frame it's likely to -# still contain the hand even with some motion. -node { - calculator: "RectTransformationCalculator" - input_stream: "NORM_RECT:hand_rect_from_landmarks" - input_stream: "IMAGE_SIZE:image_size" - output_stream: "hand_rect_for_next_frame" - node_options: { - [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { - scale_x: 1.6 - scale_y: 1.6 - square_long: true - } - } -} -``` - -### Renderer Subgraph - -![hand_renderer_gpu_subgraph.pbtxt](images/mobile/hand_renderer_gpu_subgraph.png) - -[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt) - -```bash -# MediaPipe hand tracking rendering subgraph. - -type: "RendererSubgraph" - -input_stream: "IMAGE:input_image" -input_stream: "DETECTIONS:detections" -input_stream: "LANDMARKS:landmarks" -input_stream: "NORM_RECT:rect" -output_stream: "IMAGE:output_image" - -# Converts detections to drawing primitives for annotation overlay. -node { - calculator: "DetectionsToRenderDataCalculator" - input_stream: "DETECTIONS:detections" - output_stream: "RENDER_DATA:detection_render_data" - node_options: { - [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { - thickness: 4.0 - color { r: 0 g: 255 b: 0 } - } - } -} - -# Converts landmarks to drawing primitives for annotation overlay. -node { - calculator: "LandmarksToRenderDataCalculator" - input_stream: "NORM_LANDMARKS:landmarks" - output_stream: "RENDER_DATA:landmark_render_data" - node_options: { - [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { - landmark_connections: 0 - landmark_connections: 1 - landmark_connections: 1 - landmark_connections: 2 - landmark_connections: 2 - landmark_connections: 3 - landmark_connections: 3 - landmark_connections: 4 - landmark_connections: 0 - landmark_connections: 5 - landmark_connections: 5 - landmark_connections: 6 - landmark_connections: 6 - landmark_connections: 7 - landmark_connections: 7 - landmark_connections: 8 - landmark_connections: 5 - landmark_connections: 9 - landmark_connections: 9 - landmark_connections: 10 - landmark_connections: 10 - landmark_connections: 11 - landmark_connections: 11 - landmark_connections: 12 - landmark_connections: 9 - landmark_connections: 13 - landmark_connections: 13 - landmark_connections: 14 - landmark_connections: 14 - landmark_connections: 15 - landmark_connections: 15 - landmark_connections: 16 - landmark_connections: 13 - landmark_connections: 17 - landmark_connections: 0 - landmark_connections: 17 - landmark_connections: 17 - landmark_connections: 18 - landmark_connections: 18 - landmark_connections: 19 - landmark_connections: 19 - landmark_connections: 20 - landmark_color { r: 255 g: 0 b: 0 } - connection_color { r: 0 g: 255 b: 0 } - thickness: 4.0 - } - } -} - -# Converts normalized rects to drawing primitives for annotation overlay. -node { - calculator: "RectToRenderDataCalculator" - input_stream: "NORM_RECT:rect" - output_stream: "RENDER_DATA:rect_render_data" - node_options: { - [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { - filled: false - color { r: 255 g: 0 b: 0 } - thickness: 4.0 - } - } -} - -# Draws annotations and overlays them on top of the input images. -node { - calculator: "AnnotationOverlayCalculator" - input_stream: "IMAGE_GPU:input_image" - input_stream: "detection_render_data" - input_stream: "landmark_render_data" - input_stream: "rect_render_data" - output_stream: "IMAGE_GPU:output_image" -} -``` +*Fig 1. Tracked 3D hand landmarks are represented by dots in different shades, +with the brighter ones denoting landmarks closer to the camera.* + +## ML Pipeline + +MediaPipe Hand utilizes an ML pipeline consisting of multiple models working +together: A palm detection model that operates on the full image and returns an +oriented hand bounding box. A hand landmark model that operates on the cropped +image region defined by the palm detector and returns high-fidelity 3D hand +keypoints. This architecture is similar to that employed by our recently +released [MediaPipe Face Mesh](./face_mesh_mobile_gpu.md) solution. + +Providing the accurately cropped hand image to the hand landmark model +drastically reduces the need for data augmentation (e.g. rotations, translation +and scale) and instead allows the network to dedicate most of its capacity +towards coordinate prediction accuracy. In addition, in our pipeline the crops +can also be generated based on the hand landmarks identified in the previous +frame, and only when the landmark model could no longer identify hand presence +is palm detection invoked to relocalize the hand. + +The pipeline is implemented as a MediaPipe +[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt), +which internally utilizes a +[palm/hand detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt), +a +[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt) +and a +[renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt). +For more information on how to visualize a graph and its associated subgraphs, +please see the [visualizer documentation](./visualizer.md). + +## Models + +### Palm Detection Model + +To detect initial hand locations, we designed a +[single-shot detector](https://arxiv.org/abs/1512.02325) model optimized for +mobile real-time uses in a manner similar to the face detection model in +[MediaPipe Face Mesh](./face_mesh_mobile_gpu.md). Detecting hands is a decidedly +complex task: our model has to work across a variety of hand sizes with a large +scale span (~20x) relative to the image frame and be able to detect occluded and +self-occluded hands. Whereas faces have high contrast patterns, e.g., in the eye +and mouth region, the lack of such features in hands makes it comparatively +difficult to detect them reliably from their visual features alone. Instead, +providing additional context, like arm, body, or person features, aids accurate +hand localization. + +Our method addresses the above challenges using different strategies. First, we +train a palm detector instead of a hand detector, since estimating bounding +boxes of rigid objects like palms and fists is significantly simpler than +detecting hands with articulated fingers. In addition, as palms are smaller +objects, the non-maximum suppression algorithm works well even for two-hand +self-occlusion cases, like handshakes. Moreover, palms can be modelled using +square bounding boxes (anchors in ML terminology) ignoring other aspect ratios, +and therefore reducing the number of anchors by a factor of 3-5. Second, an +encoder-decoder feature extractor is used for bigger scene context awareness +even for small objects (similar to the RetinaNet approach). Lastly, we minimize +the focal loss during training to support a large amount of anchors resulting +from the high scale variance. + +With the above techniques, we achieve an average precision of 95.7% in palm +detection. Using a regular cross entropy loss and no decoder gives a baseline of +just 86.22%. + +### Hand Landmark Model + +After the palm detection over the whole image our subsequent hand landmark model +performs precise keypoint localization of 21 3D hand-knuckle coordinates inside +the detected hand regions via regression, that is direct coordinate prediction. +The model learns a consistent internal hand pose representation and is robust +even to partially visible hands and self-occlusions. + +To obtain ground truth data, we have manually annotated ~30K real-world images +with 21 3D coordinates, as shown below (we take Z-value from image depth map, if +it exists per corresponding coordinate). To better cover the possible hand poses +and provide additional supervision on the nature of hand geometry, we also +render a high-quality synthetic hand model over various backgrounds and map it +to the corresponding 3D coordinates. + +![hand_crops.png](images/mobile/hand_crops.png) + +*Fig 2. Top: Aligned hand crops passed to the tracking network with ground truth +annotation. Bottom: Rendered synthetic hand images with ground truth +annotation.* + +## Example Apps + +Please see the [general instructions](./building_examples.md) for how to build +MediaPipe examples for different platforms. + +#### Main Example + +* Android: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu), + [Prebuilt ARM64 APK](https://drive.google.com/open?id=1uCjS0y0O0dTDItsMh8x2cf4-l3uHW1vE) +* iOS: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu) +* Desktop: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking) + +#### With Multi-hand Support + +* Android: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu), + [Prebuilt ARM64 APK](https://drive.google.com/open?id=1Wk6V9EVaz1ks_MInPqqVGvvJD01SGXDc) +* iOS: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu) +* Desktop: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking) + +#### Palm/Hand Detection Only (no landmarks) + +* Android: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectionggpu), + [Prebuilt ARM64 APK](https://drive.google.com/open?id=1qUlTtH7Ydg-wl_H6VVL8vueu2UCTu37E) +* iOS: + [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu) + +## Resources + +* [Google AI Blog: On-Device, Real-Time Hand Tracking with MediaPipe](https://ai.googleblog.com/2019/08/on-device-real-time-hand-tracking-with.html) +* [TensorFlow Blog: Face and hand tracking in the browser with MediaPipe and + TensorFlow.js](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html) +* Palm detection model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite), + [TF.js model](https://tfhub.dev/mediapipe/handdetector/1) +* Hand landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite), + [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1) +* [Model card](https://mediapipe.page.link/handmc) diff --git a/mediapipe/docs/hello_world_android.md b/mediapipe/docs/hello_world_android.md index eb186d657..a83b39a47 100644 --- a/mediapipe/docs/hello_world_android.md +++ b/mediapipe/docs/hello_world_android.md @@ -32,7 +32,7 @@ We will be using the following graph, [`edge_detection_mobile_gpu.pbtxt`]: ``` # MediaPipe graph that performs GPU Sobel edge detection on a live video stream. # Used in the examples -# mediapipe/examples/android/src/java/com/mediapipe/apps/edgedetectiongpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/basic. # mediapipe/examples/ios/edgedetectiongpu. # Images coming into and out of the graph. @@ -80,15 +80,15 @@ applications using `bazel`. Create a new directory where you will create your Android application. For example, the complete code of this tutorial can be found at -`mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu`. -We will refer to this path as `$APPLICATION_PATH` throughout the codelab. +`mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic`. We +will refer to this path as `$APPLICATION_PATH` throughout the codelab. Note that in the path to the application: -* The application is named `edgedetectiongpu`. +* The application is named `helloworld`. * The `$PACKAGE_PATH` of the application is - `com.google.mediapipe.apps.edgdetectiongpu`. This is used in code snippets in - this tutorial, so please remember to use your own `$PACKAGE_PATH` when you + `com.google.mediapipe.apps.basic`. This is used in code snippets in this + tutorial, so please remember to use your own `$PACKAGE_PATH` when you copy/use the code snippets. Add a file `activity_main.xml` to `$APPLICATION_PATH/res/layout`. This displays @@ -119,7 +119,7 @@ Add a simple `MainActivity.java` to `$APPLICATION_PATH` which loads the content of the `activity_main.xml` layout as shown below: ``` -package com.google.mediapipe.apps.edgedetectiongpu; +package com.google.mediapipe.apps.basic; import android.os.Bundle; import androidx.appcompat.app.AppCompatActivity; @@ -141,7 +141,7 @@ launches `MainActivity` on application start: ``` + package="com.google.mediapipe.apps.basic"> @@ -166,17 +166,8 @@ launches `MainActivity` on application start: ``` -To get `@string/app_name`, we need to add a file `strings.xml` to -`$APPLICATION_PATH/res/values/`: - -``` - - Edge Detection GPU - -``` - -Also, in our application we are using a `Theme.AppCompat` theme in the app, so -we need appropriate theme references. Add `colors.xml` to +In our application we are using a `Theme.AppCompat` theme in the app, so we need +appropriate theme references. Add `colors.xml` to `$APPLICATION_PATH/res/values/`: ``` @@ -204,11 +195,13 @@ Add `styles.xml` to `$APPLICATION_PATH/res/values/`: ``` -To build the application, add a `BUILD` file to `$APPLICATION_PATH`: +To build the application, add a `BUILD` file to `$APPLICATION_PATH`, and +`${appName}` and `${mainActivity}` in the manifest will be replaced by strings +specified in `BUILD` as shown below. ``` android_library( - name = "mediapipe_lib", + name = "basic_lib", srcs = glob(["*.java"]), manifest = "AndroidManifest.xml", resource_files = glob(["res/**"]), @@ -219,34 +212,36 @@ android_library( ) android_binary( - name = "edgedetectiongpu", - aapt_version = "aapt2", + name = "helloworld", manifest = "AndroidManifest.xml", - manifest_values = {"applicationId": "com.google.mediapipe.apps.edgedetectiongpu"}, + manifest_values = { + "applicationId": "com.google.mediapipe.apps.basic", + "appName": "Hello World", + "mainActivity": ".MainActivity", + }, multidex = "native", deps = [ - ":mediapipe_lib", + ":basic_lib", ], ) - ``` The `android_library` rule adds dependencies for `MainActivity`, resource files and `AndroidManifest.xml`. -The `android_binary` rule, uses the `mediapipe_lib` Android library generated to +The `android_binary` rule, uses the `basic_lib` Android library generated to build a binary APK for installation on your Android device. To build the app, use the following command: ``` -bazel build -c opt --config=android_arm64 $APPLICATION_PATH +bazel build -c opt --config=android_arm64 $APPLICATION_PATH:helloworld ``` Install the generated APK file using `adb install`. For example: ``` -adb install bazel-bin/$APPLICATION_PATH/edgedetectiongpu.apk +adb install bazel-bin/$APPLICATION_PATH/helloworld.apk ``` Open the application on your device. It should display a screen with the text @@ -438,22 +433,58 @@ visible so that we can start seeing frames from the `previewFrameTexture`. However, before starting the camera, we need to decide which camera we want to use. [`CameraXPreviewHelper`] inherits from [`CameraHelper`] which provides two -options, `FRONT` and `BACK`. We will use `BACK` camera for this application to -perform edge detection on a live scene that we view from the camera. +options, `FRONT` and `BACK`. We can pass in the decision from the `BUILD` file +as metadata such that no code change is required to build a another version of +the app using a different camera. -Add the following line to define `CAMERA_FACING` for our application, +Assuming we want to use `BACK` camera to perform edge detection on a live scene +that we view from the camera, add the metadata into `AndroidManifest.xml`: ``` -private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK; + ... + + + ``` -`CAMERA_FACING` is a static variable as we will use the same camera throughout -the application from start to finish. +and specify the selection in `BUILD` in the `helloworld` android binary rule +with a new entry in `manifest_values`: + +``` +manifest_values = { + "applicationId": "com.google.mediapipe.apps.basic", + "appName": "Hello World", + "mainActivity": ".MainActivity", + "cameraFacingFront": "False", +}, +``` + +Now, in `MainActivity` to retrieve the metadata specified in `manifest_values`, +add an [`ApplicationInfo`] object: + +``` +private ApplicationInfo applicationInfo; +``` + +In the `onCreate()` function, add: + +``` +try { + applicationInfo = + getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA); +} catch (NameNotFoundException e) { + Log.e(TAG, "Cannot find application info: " + e); +} +``` Now add the following line at the end of the `startCamera()` function: ``` -cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null); +CameraHelper.CameraFacing cameraFacing = + applicationInfo.metaData.getBoolean("cameraFacingFront", false) + ? CameraHelper.CameraFacing.FRONT + : CameraHelper.CameraFacing.BACK; +cameraHelper.startCamera(this, cameraFacing, /*surfaceTexture=*/ null); ``` At this point, the application should build successfully. However, when you run @@ -595,24 +626,13 @@ build rule: MediaPipe graphs are `.pbtxt` files, but to use them in the application, we need to use the `mediapipe_binary_graph` build rule to generate a `.binarypb` file. -We can then use an application specific alias for the graph via the `genrule` -build rule. Add the following `genrule` to use an alias for the edge detection -graph: -``` -genrule( - name = "binary_graph", - srcs = ["//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph"], - outs = ["edgedetectiongpu.binarypb"], - cmd = "cp $< $@", -) -``` - -Then in the `mediapipe_lib` build rule, add assets: +In the `helloworld` android binary build rule, add the `mediapipe_binary_graph` +target specific to the graph as an asset: ``` assets = [ - ":binary_graph", + "//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph", ], assets_dir = "", ``` @@ -620,6 +640,26 @@ assets_dir = "", In the `assets` build rule, you can also add other assets such as TensorFlowLite models used in your graph. +In addition, add additional `manifest_values` for properties specific to the +graph, to be later retrieved in `MainActivity`: + +``` +manifest_values = { + "applicationId": "com.google.mediapipe.apps.basic", + "appName": "Hello World", + "mainActivity": ".MainActivity", + "cameraFacingFront": "False", + "binaryGraphName": "mobile_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", +}, +``` + +Note that `binaryGraphName` indicates the filename of the binary graph, +determined by the `output_name` field in the `mediapipe_binary_graph` target. +`inputVideoStreamName` and `outputVideoStreamName` are the input and output +video stream name specified in the graph respectively. + Now, the `MainActivity` needs to load the MediaPipe framework. Also, the framework uses OpenCV, so `MainActvity` should also load `OpenCV`. Use the following code in `MainActivity` (inside the class, but not inside any function) @@ -648,15 +688,6 @@ Initialize the asset manager in `onCreate(Bundle)` before initializing AndroidAssetUtil.initializeNativeAssetManager(this); ``` -Declare a static variable with the graph name, the name of the input stream and -the name of the output stream: - -``` -private static final String BINARY_GRAPH_NAME = "edgedetectiongpu.binarypb"; -private static final String INPUT_VIDEO_STREAM_NAME = "input_video"; -private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video"; -``` - Now, we need to setup a [`FrameProcessor`] object that sends camera frames prepared by the `converter` to the MediaPipe graph and runs the graph, prepares the output and then updates the `previewDisplayView` to display the output. Add @@ -673,9 +704,9 @@ processor = new FrameProcessor( this, eglManager.getNativeContext(), - BINARY_GRAPH_NAME, - INPUT_VIDEO_STREAM_NAME, - OUTPUT_VIDEO_STREAM_NAME); + applicationInfo.metaData.getString("binaryGraphName"), + applicationInfo.metaData.getString("inputVideoStreamName"), + applicationInfo.metaData.getString("outputVideoStreamName")); ``` The `processor` needs to consume the converted frames from the `converter` for @@ -712,8 +743,9 @@ feed! Congrats! ![edge_detection_android_gpu_gif](images/mobile/edge_detection_android_gpu.gif) If you ran into any issues, please see the full code of the tutorial -[here](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu). +[here](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic). +[`ApplicationInfo`]:https://developer.android.com/reference/android/content/pm/ApplicationInfo [`AndroidAssetUtil`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/framework/AndroidAssetUtil.java [Bazel]:https://bazel.build/ [`CameraHelper`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/CameraHelper.java @@ -721,7 +753,6 @@ If you ran into any issues, please see the full code of the tutorial [`CameraXPreviewHelper`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java [developer options]:https://developer.android.com/studio/debug/dev-options [`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt -[`EdgeDetectionGPU` example]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/edgedetectiongpu/ [`EglManager`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/glutil/EglManager.java [`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java [`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout diff --git a/mediapipe/docs/hello_world_ios.md b/mediapipe/docs/hello_world_ios.md index 76cc8afb3..ac9f76885 100644 --- a/mediapipe/docs/hello_world_ios.md +++ b/mediapipe/docs/hello_world_ios.md @@ -183,7 +183,7 @@ bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/edgedetectiongpu:Ed Then, go back to XCode, open Window > Devices and Simulators, select your device, and add the `.ipa` file generated by the command above to your device. -Here is the document on [setting up and compiling](./mediapipe_ios_setup.md) iOS +Here is the document on [setting up and compiling](./building_examples.md#ios) iOS MediaPipe apps. Open the application on your device. Since it is empty, it should display a @@ -348,7 +348,7 @@ responded. Add the following code to `viewWillAppear:animated`: ``` [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { if (granted) { - dispatch_queue(_videoQueue, ^{ + dispatch_async(_videoQueue, ^{ [_cameraSource start]; }); } @@ -405,7 +405,7 @@ Declare a static constant with the name of the graph, the input stream and the output stream: ``` -static NSString* const kGraphName = @"android_gpu"; +static NSString* const kGraphName = @"mobile_gpu"; static const char* kInputStream = "input_video"; static const char* kOutputStream = "output_video"; @@ -483,7 +483,7 @@ in our app: NSLog(@"Failed to start graph: %@", error); } - dispatch_queue(_videoQueue, ^{ + dispatch_async(_videoQueue, ^{ [_cameraSource start]; }); } diff --git a/mediapipe/docs/images/hand_tracking_desktop.png b/mediapipe/docs/images/hand_tracking_desktop.png index cfc38f057..30ea34de5 100644 Binary files a/mediapipe/docs/images/hand_tracking_desktop.png and b/mediapipe/docs/images/hand_tracking_desktop.png differ diff --git a/mediapipe/docs/images/mobile/hand_crops.png b/mediapipe/docs/images/mobile/hand_crops.png new file mode 100644 index 000000000..46195aab0 Binary files /dev/null and b/mediapipe/docs/images/mobile/hand_crops.png differ diff --git a/mediapipe/docs/images/mobile/hand_landmark_gpu_subgraph.png b/mediapipe/docs/images/mobile/hand_landmark_gpu_subgraph.png index e40a9169b..2e66d18d6 100644 Binary files a/mediapipe/docs/images/mobile/hand_landmark_gpu_subgraph.png and b/mediapipe/docs/images/mobile/hand_landmark_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/hand_renderer_gpu_subgraph.png b/mediapipe/docs/images/mobile/hand_renderer_gpu_subgraph.png index 7fd2f5589..a32117252 100644 Binary files a/mediapipe/docs/images/mobile/hand_renderer_gpu_subgraph.png and b/mediapipe/docs/images/mobile/hand_renderer_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/hand_tracking_mobile.png b/mediapipe/docs/images/mobile/hand_tracking_mobile.png index 3b2063190..fb70f5e66 100644 Binary files a/mediapipe/docs/images/mobile/hand_tracking_mobile.png and b/mediapipe/docs/images/mobile/hand_tracking_mobile.png differ diff --git a/mediapipe/docs/images/visualizer/ios_download_container.png b/mediapipe/docs/images/visualizer/ios_download_container.png new file mode 100644 index 000000000..375b5410f Binary files /dev/null and b/mediapipe/docs/images/visualizer/ios_download_container.png differ diff --git a/mediapipe/docs/images/visualizer/ios_window_devices.png b/mediapipe/docs/images/visualizer/ios_window_devices.png new file mode 100644 index 000000000..c778afeaa Binary files /dev/null and b/mediapipe/docs/images/visualizer/ios_window_devices.png differ diff --git a/mediapipe/docs/images/visualizer/viz_chart_view.png b/mediapipe/docs/images/visualizer/viz_chart_view.png new file mode 100644 index 000000000..f18061397 Binary files /dev/null and b/mediapipe/docs/images/visualizer/viz_chart_view.png differ diff --git a/mediapipe/docs/images/visualizer/viz_click_upload.png b/mediapipe/docs/images/visualizer/viz_click_upload.png new file mode 100644 index 000000000..c2f0ab127 Binary files /dev/null and b/mediapipe/docs/images/visualizer/viz_click_upload.png differ diff --git a/mediapipe/docs/images/visualizer/viz_click_upload_trace_file.png b/mediapipe/docs/images/visualizer/viz_click_upload_trace_file.png new file mode 100644 index 000000000..d1ba8a223 Binary files /dev/null and b/mediapipe/docs/images/visualizer/viz_click_upload_trace_file.png differ diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md index 78b89135b..7d87fdd5a 100644 --- a/mediapipe/docs/install.md +++ b/mediapipe/docs/install.md @@ -16,18 +16,15 @@ Choose your operating system: - [Installing on Debian and Ubuntu](#installing-on-debian-and-ubuntu) - [Installing on CentOS](#installing-on-centos) - [Installing on macOS](#installing-on-macos) +- [Installing on Windows](#installing-on-windows) - [Installing on Windows Subsystem for Linux (WSL)](#installing-on-windows-subsystem-for-linux-wsl) - [Installing using Docker](#installing-using-docker) -To build and run Android apps: +To build and run Android example apps, see these +[instuctions](./building_examples.md#android). -- [Setting up Android SDK and NDK](#setting-up-android-sdk-and-ndk) -- [Using MediaPipe with Gradle](#using-mediapipe-with-gradle) -- [Using MediaPipe with Bazel](#using-mediapipe-with-bazel) - -To build and run iOS apps: - -- Please see the separate [iOS setup](./mediapipe_ios_setup.md) documentation. +To build and run iOS example apps, see these +[instuctions](./building_examples.md#ios). ### Installing on Debian and Ubuntu @@ -355,6 +352,105 @@ To build and run iOS apps: # Hello World! ``` +### Installing on Windows + +**Disclaimer**: Running MediaPipe on Windows is experimental. + +Note: building MediaPipe Android apps is still not possible on native +Windows. Please do this in WSL instead and see the WSL setup instruction in the +next section. + +1. Install [MSYS2](https://www.msys2.org/) and edit the `%PATH%` environment + variable. + + If MSYS2 is installed to `C:\msys64`, add `C:\msys64\usr\bin` to your + `%PATH%` environment variable. + +2. Install necessary packages. + + ``` + C:\> pacman -S git patch unzip + ``` + +3. Install Python and allow the executable to edit the `%PATH%` environment + variable. + + Download Python Windows executable from + https://www.python.org/downloads/windows/ and install. + +4. Install Visual C++ Build Tools 2019 and WinSDK + + Go to https://visualstudio.microsoft.com/visual-cpp-build-tools, download + build tools, and install Microsoft Visual C++ 2019 Redistributable and + Microsoft Build Tools 2019. + + Download the WinSDK from + https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/ and + install. + +5. Install Bazel and add the location of the Bazel executable to the `%PATH%` + environment variable. + + Follow the official + [Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html) + to install Bazel 2.0 or higher. + +6. Set Bazel variables. + + ``` + # Find the exact paths and version numbers from your local version. + C:\> set BAZEL_VS=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools + C:\> set BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC + C:\> set BAZEL_VC_FULL_VERSION=14.25.28610 + C:\> set BAZEL_WINSDK_FULL_VERSION=10.1.18362.1 + ``` + +7. Checkout MediaPipe repository. + + ``` + C:\Users\Username\mediapipe_repo> git clone https://github.com/google/mediapipe.git + + # Change directory into MediaPipe root directory + C:\Users\Username\mediapipe_repo> cd mediapipe + ``` + +8. Install OpenCV. + + Download the Windows executable from https://opencv.org/releases/ and + install. We currently use OpenCV 3.4.10. Remember to edit the [`WORKSPACE`] + file if OpenCV is not installed at `C:\opencv`. + + ``` + new_local_repository( + name = "windows_opencv", + build_file = "@//third_party:opencv_windows.BUILD", + path = "C:\\\\build", + ) + ``` + +9. Run the [Hello World desktop example](./hello_world_desktop.md). + + ``` + C:\Users\Username\mediapipe_repo>bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world + + C:\Users\Username\mediapipe_repo>set GLOG_logtostderr=1 + + C:\Users\Username\mediapipe_repo>bazel-bin\mediapipe\examples\desktop\hello_world\hello_world.exe + + # should print: + # I20200514 20:43:12.277598 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.278597 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World! + # I20200514 20:43:12.280613 1200 hello_world.cc:56] Hello World! + + ``` + ### Installing on Windows Subsystem for Linux (WSL) Note: The pre-built OpenCV packages don't support cameras in WSL. Unless you @@ -565,150 +661,8 @@ This will use a Docker image that will isolate mediapipe's installation from the docker run -i -t mediapipe:latest ``` --> -### Setting up Android SDK and NDK - -Requirements: - -* Java Runtime. -* Android SDK release 28.0.3 and above. -* Android NDK r17c and above. - -MediaPipe recommends setting up Android SDK and NDK via Android Studio, and see -[next section](#setting-up-android-studio-with-mediapipe) for Android Studio -setup. However, if you prefer using MediaPipe without Android Studio, please run -[`setup_android_sdk_and_ndk.sh`] to download and setup Android SDK and NDK -before building any Android example apps. - -If Android SDK and NDK are already installed (e.g., by Android Studio), set -$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK. - -```bash -export ANDROID_HOME= -export ANDROID_NDK_HOME= -``` - -In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch -to a lower Android API level. You can achieve this by specifying `api_level = -` in android_ndk_repository() and/or android_sdk_repository() -in the [`WORKSPACE`] file. - -Please verify all the necessary packages are installed. - -* Android SDK Platform API Level 28 or 29 -* Android SDK Build-Tools 28 or 29 -* Android SDK Platform-Tools 28 or 29 -* Android SDK Tools 26.1.1 -* Android NDK 17c or above - -### Using MediaPipe with Gradle - -MediaPipe can be used within an existing project, such as a Gradle project, -using the MediaPipe AAR target defined in mediapipe_aar.bzl. Please see the -separate [MediaPipe Android Archive Library](./android_archive_library.md) -documentation. - -### Using MediaPipe with Bazel - -The MediaPipe project can be imported to Android Studio using the Bazel plugins. -This allows the MediaPipe examples and demos to be built and modified in Android -Studio. To incorporate MediaPipe into an existing Android Studio project, see: -"Using MediaPipe with Gradle". The steps below use Android Studio 3.5 to build -and install a MediaPipe example app. - -1. Install and launch Android Studio 3.5. - -2. Select `Configure` | `SDK Manager` | `SDK Platforms`. - - * Verify that Android SDK Platform API Level 28 or 29 is installed. - * Take note of the Android SDK Location, e.g., - `/usr/local/home/Android/Sdk`. - -3. Select `Configure` | `SDK Manager` | `SDK Tools`. - - * Verify that Android SDK Build-Tools 28 or 29 is installed. - * Verify that Android SDK Platform-Tools 28 or 29 is installed. - * Verify that Android SDK Tools 26.1.1 is installed. - * Verify that Android NDK 17c or above is installed. - * Take note of the Android NDK Location, e.g., - `/usr/local/home/Android/Sdk/ndk-bundle` or - `/usr/local/home/Android/Sdk/ndk/20.0.5594570`. - -4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point - to the installed SDK and NDK. - - ```bash - export ANDROID_HOME=/usr/local/home/Android/Sdk - - # If the NDK libraries are installed by a previous version of Android Studio, do - export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle - # If the NDK libraries are installed by Android Studio 3.5, do - export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/ - ``` - -5. Select `Configure` | `Plugins` install `Bazel`. - -6. On Linux, select `File` | `Settings`| `Bazel settings`. On macos, select - `Android Studio` | `Preferences` | `Bazel settings`. Then, modify `Bazel - binary location` to be the same as the output of `$ which bazel`. - -7. Select `Import Bazel Project`. - - * Select `Workspace`: `/path/to/mediapipe` and select `Next`. - * Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select - `Next`. - * Modify `Project View` to be the following and select `Finish`. - - ``` - directories: - # read project settings, e.g., .bazelrc - . - -mediapipe/objc - -mediapipe/examples/ios - - targets: - //mediapipe/examples/android/...:all - //mediapipe/java/...:all - - android_sdk_platform: android-29 - - sync_flags: - --host_crosstool_top=@bazel_tools//tools/cpp:toolchain - ``` - -8. Select `Bazel` | `Sync` | `Sync project with Build files`. - - Note: Even after doing step 4, if you still see the error: `"no such package - '@androidsdk//': Either the path attribute of android_sdk_repository or the - ANDROID_HOME environment variable must be set."`, please modify the - **WORKSPACE** file to point to your SDK and NDK library locations, as below: - - ``` - android_sdk_repository( - name = "androidsdk", - path = "/path/to/android/sdk" - ) - - android_ndk_repository( - name = "androidndk", - path = "/path/to/android/ndk" - ) - ``` - -9. Connect an Android device to the workstation. - -10. Select `Run...` | `Edit Configurations...`. - - * Select `Templates` | `Bazel Command`. - * Enter Target Expression: - `//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu` - * Enter Bazel command: `mobile-install`. - * Enter Bazel flags: `-c opt --config=android_arm64`. - * Press the `[+]` button to add the new configuration. - * Select `Run` to run the example app on the connected Android device. - [`WORKSPACE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE [`opencv_linux.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_linux.BUILD [`opencv_macos.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_macos.BUILD [`ffmpeg_macos.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_macos.BUILD [`setup_opencv.sh`]: https://github.com/google/mediapipe/tree/master/setup_opencv.sh -[`setup_android_sdk_and_ndk.sh`]: https://github.com/google/mediapipe/tree/master/setup_android_sdk_and_ndk.sh diff --git a/mediapipe/docs/media_sequence.md b/mediapipe/docs/media_sequence.md index e248aa5e4..10693ea7f 100644 --- a/mediapipe/docs/media_sequence.md +++ b/mediapipe/docs/media_sequence.md @@ -78,25 +78,8 @@ process new data sets, in the documentation of PYTHONPATH="${PYTHONPATH};"+`pwd` ``` - and then you can import the data set in Python. - - ```python - import tensorflow as tf - from mediapipe.examples.desktop.media_sequence.demo_dataset import DemoDataset - demo_data_path = '/tmp/demo_data/' - with tf.Graph().as_default(): - d = DemoDataset(demo_data_path) - dataset = d.as_dataset('test') - # implement additional processing and batching here - dataset_output = dataset.make_one_shot_iterator().get_next() - images = dataset_output['images'] - labels = dataset_output['labels'] - - with tf.Session() as sess: - images_, labels_ = sess.run([images, labels]) - print('The shape of images_ is %s' % str(images_.shape)) - print('The shape of labels_ is %s' % str(labels_.shape)) - ``` + and then you can import the data set in Python using + [read_demo_dataset.py](mediapipe/examples/desktop/media_sequence/read_demo_dataset.py) ### Preparing a practical data set As an example of processing a practical data set, a similar set of commands will diff --git a/mediapipe/docs/mediapipe_ios_setup.md b/mediapipe/docs/mediapipe_ios_setup.md deleted file mode 100644 index 8f8d2880c..000000000 --- a/mediapipe/docs/mediapipe_ios_setup.md +++ /dev/null @@ -1,118 +0,0 @@ -## Setting up MediaPipe for iOS - -1. Install [Xcode](https://developer.apple.com/xcode/) and the Command Line - Tools. - - Follow Apple's instructions to obtain the required development certificates - and provisioning profiles for your iOS device. Install the Command Line - Tools by - - ```bash - xcode-select --install - ``` - -2. Install [Bazel 1.1.0](https://bazel.build/). - - We recommend using [Homebrew](https://brew.sh/): - - ```bash - $ brew install https://raw.githubusercontent.com/bazelbuild/homebrew-tap/f8a0fa981bcb1784a0d0823e14867b844e94fb3d/Formula/bazel.rb - ``` - -3. Set Python 3.7 as the default Python version and install the Python "six" - library. - - To make Mediapipe work with TensorFlow, please set Python 3.7 as the default - Python version and install the Python "six" library. - - ```bash - pip3 install --user six - ``` - -4. Clone the MediaPipe repository. - - ```bash - git clone https://github.com/google/mediapipe.git - ``` - -5. Symlink or copy your provisioning profile to - `mediapipe/mediapipe/provisioning_profile.mobileprovision`. - - ```bash - cd mediapipe - ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision - ``` - -Tip: You can use this command to see the provisioning profiles you have -previously downloaded using Xcode: `open ~/Library/MobileDevice/"Provisioning Profiles"`. -If there are none, generate and download a profile on [Apple's developer site](https://developer.apple.com/account/resources/). - -## Creating an Xcode project - -Note: This workflow requires a separate tool in addition to Bazel. If it fails -to work for any reason, you can always use the command-line build instructions -in the next section. - -1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating Xcode projects from Bazel - build configurations. - - IMPORTANT: At the time of this writing, Tulsi has a small [issue](https://github.com/bazelbuild/tulsi/issues/98) - that keeps it from building with Xcode 10.3. The instructions below apply a - fix from a [pull request](https://github.com/bazelbuild/tulsi/pull/99). - - ```bash - # cd out of the mediapipe directory, then: - git clone https://github.com/bazelbuild/tulsi.git - cd tulsi - # Apply the fix for Xcode 10.3 compatibility: - git fetch origin pull/99/head:xcodefix - git checkout xcodefix - # Now we can build Tulsi. - sh build_and_run.sh - ``` - - This will install Tulsi.app inside the Applications directory inside your - home directory. - -2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app. - - Important: If Tulsi displays an error saying "Bazel could not be found", - press the "Bazel..." button in the Packages tab and select the `bazel` - executable in your homebrew `/bin/` directory. - -3. Select the MediaPipe config in the Configs tab, then press the Generate - button below. You will be asked for a location to save the Xcode project. - Once the project is generated, it will be opened in Xcode. - -4. You can now select any of the MediaPipe demos in the target menu, and build - and run them as normal. - -Note: When you ask Xcode to run an app, by default it will use the Debug -configuration. Some of our demos are computationally heavy; you may want to use -the Release configuration for better performance. - -Tip: To switch build configuration in Xcode, click on the target menu, choose -"Edit Scheme...", select the Run action, and switch the Build Configuration from -Debug to Release. Note that this is set independently for each target. - -## Building an iOS app from the command line - -1. Modify the `bundle_id` field of the app's ios_application rule to use your own identifier, e.g. for [Face Detection GPU App example](./face_detection_mobile_gpu.md), you need to modify the line 26 of the [BUILD file](https://github.com/google/mediapipe/blob/master/mediapipe/examples/ios/facedetectiongpu/BUILD). - -2. Build one of the example apps for iOS. We will be using the - [Face Detection GPU App example](./face_detection_mobile_gpu.md) - - ```bash - cd mediapipe - bazel build --config=ios_arm64 mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp - ``` - - You may see a permission request from `codesign` in order to sign the app. - -3. In Xcode, open the `Devices and Simulators` window (command-shift-2). - -4. Make sure your device is connected. You will see a list of installed apps. - Press the "+" button under the list, and select the `.ipa` file built by - Bazel. - -5. You can now run the app on your device. diff --git a/mediapipe/docs/multi_hand_tracking_mobile_gpu.md b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md index 58ae8c38b..8665f2546 100644 --- a/mediapipe/docs/multi_hand_tracking_mobile_gpu.md +++ b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md @@ -41,12 +41,6 @@ To build the app yourself, run: bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu ``` -To build for the 3D mode, run: - -```bash -bazel build -c opt --config=android_arm64 --define 3D=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu -``` - Once the app is built, install it on Android device with: ```bash @@ -57,7 +51,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the HandDetectionGpuApp target. @@ -67,12 +61,6 @@ To build on the command line: bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp ``` -To build for the 3D mode, run: - -```bash -bazel build -c opt --config=ios_arm64 --define 3D=true mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp -``` - ## Graph The multi-hand tracking [main graph](#main-graph) internal utilizes a diff --git a/mediapipe/docs/object_detection_mobile_cpu.md b/mediapipe/docs/object_detection_mobile_cpu.md index cce007414..c167315c2 100644 --- a/mediapipe/docs/object_detection_mobile_cpu.md +++ b/mediapipe/docs/object_detection_mobile_cpu.md @@ -29,7 +29,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handdetectiongpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the ObjectDetectionCpuApp target. diff --git a/mediapipe/docs/object_detection_mobile_gpu.md b/mediapipe/docs/object_detection_mobile_gpu.md index 031b7a02e..159ef6be7 100644 --- a/mediapipe/docs/object_detection_mobile_gpu.md +++ b/mediapipe/docs/object_detection_mobile_gpu.md @@ -21,7 +21,7 @@ adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/a [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/objectdetectiongpu). -See the general [instructions](./mediapipe_ios_setup.md) for building iOS +See the general [instructions](./building_examples.md#ios) for building iOS examples and generating an Xcode project. This will be the ObjectDetectionGpuApp target. diff --git a/mediapipe/docs/profiler_config.md b/mediapipe/docs/profiler_config.md new file mode 100644 index 000000000..08ce18f6b --- /dev/null +++ b/mediapipe/docs/profiler_config.md @@ -0,0 +1,74 @@ +# Profiler Configuration Settings + + + +[TOC] + +The following settings are used when setting up [MediaPipe Tracing](tracer.md) +Many of them are advanced and not recommended for general usage. Consult +[MediaPipe Tracing](tracer.md) for a friendlier introduction. + +histogram_interval_size_usec :Specifies the size of the runtimes histogram +intervals (in microseconds) to generate the histogram of the Process() time. The +last interval extends to +inf. If not specified, the interval is 1000000 usec = +1 sec. + +num_histogram_intervals :Specifies the number of intervals to generate the +histogram of the `Process()` runtime. If not specified, one interval is used. + +enable_profiler +: If true, the profiler starts profiling when graph is initialized. + +enable_stream_latency +: If true, the profiler also profiles the stream latency and input-output + latency. No-op if enable_profiler is false. + +use_packet_timestamp_for_added_packet +: If true, the profiler uses packet timestamp (as production time and source + production time) for packets added by calling + `CalculatorGraph::AddPacketToInputStream()`. If false, uses the profiler's + clock. + +trace_log_capacity +: The maximum number of trace events buffered in memory. The default value + buffers up to 20000 events. + +trace_event_types_disabled +: Trace event types that are not logged. + +trace_log_path +: The output directory and base-name prefix for trace log files. Log files are + written to: StrCat(trace_log_path, index, "`.binarypb`") + +trace_log_count +: The number of trace log files retained. The trace log files are named + "`trace_0.log`" through "`trace_k.log`". The default value specifies 2 + output files retained. + +trace_log_interval_usec +: The interval in microseconds between trace log output. The default value + specifies trace log output once every 0.5 sec. + +trace_log_margin_usec +: The interval in microseconds between TimeNow and the highest times included + in trace log output. This margin allows time for events to be appended to + the TraceBuffer. + +trace_log_duration_events +: False specifies an event for each calculator invocation. True specifies a + separate event for each start and finish time. + +trace_log_interval_count +: The number of trace log intervals per file. The total log duration is: + `trace_log_interval_usec * trace_log_file_count * trace_log_interval_count`. + The default value specifies 10 intervals per file. + +trace_log_disabled +: An option to turn ON/OFF writing trace files to disk. Saving trace files to + disk is enabled by default. + +trace_enabled +: If true, tracer timing events are recorded and reported. diff --git a/mediapipe/docs/template_matching_mobile_cpu.md b/mediapipe/docs/template_matching_mobile_cpu.md index d5b87fdfa..78d813442 100644 --- a/mediapipe/docs/template_matching_mobile_cpu.md +++ b/mediapipe/docs/template_matching_mobile_cpu.md @@ -36,7 +36,7 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ mediapipe/examples/desktop/template_matching:template_matching_tflite $ bazel-bin/mediapipe/examples/desktop/template_matching/template_matching_tflite \ --calculator_graph_config_file=mediapipe/graphs/template_matching/index_building.pbtxt \ - --input_side_packets="file_directory=