Merge branch 'master' into master

This commit is contained in:
thuan86 2021-03-24 15:43:27 +08:00 committed by GitHub
commit fde35cd091
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1168 changed files with 92145 additions and 17715 deletions

View File

@ -5,25 +5,28 @@ common --experimental_repo_remote_exec
# Basic build settings
build --jobs 128
build --define='absl=1'
build --define='absl=1' # for gtest
build --enable_platform_specific_config
# Enable stack traces
test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"
# Linux
build:linux --cxxopt=-std=c++14
build:linux --host_cxxopt=-std=c++14
build:linux --cxxopt=-std=c++17
build:linux --host_cxxopt=-std=c++17
build:linux --copt=-w
# windows
build:windows --cxxopt=/std:c++14
build:windows --host_cxxopt=/std:c++14
build:windows --cxxopt=/std:c++17
build:windows --host_cxxopt=/std:c++17
build:windows --copt=/w
# For using M_* math constants on Windows with MSVC.
build:windows --copt=/D_USE_MATH_DEFINES
build:windows --host_copt=/D_USE_MATH_DEFINES
# macOS
build:macos --cxxopt=-std=c++14
build:macos --host_cxxopt=-std=c++14
build:macos --cxxopt=-std=c++17
build:macos --host_cxxopt=-std=c++17
build:macos --copt=-w
# Sets the default Apple platform to macOS.
@ -83,3 +86,9 @@ build:ios_fat --watchos_cpus=armv7k
build:darwin_x86_64 --apple_platform_type=macos
build:darwin_x86_64 --macos_minimum_os=10.12
build:darwin_x86_64 --cpu=darwin_x86_64
# This bazelrc file is meant to be written by a setup script.
try-import %workspace%/.configure.bazelrc
# This bazelrc file can be used for user-specific custom build settings.
try-import %workspace%/.user.bazelrc

2
.gitignore vendored
View File

@ -2,3 +2,5 @@ bazel-*
mediapipe/MediaPipe.xcodeproj
mediapipe/MediaPipe.tulsiproj/*.tulsiconf-user
mediapipe/provisioning_profile.mobileprovision
.configure.bazelrc
.user.bazelrc

View File

@ -54,7 +54,7 @@ RUN pip3 install tf_slim
RUN ln -s /usr/bin/python3 /usr/bin/python
# Install bazel
ARG BAZEL_VERSION=2.0.0
ARG BAZEL_VERSION=3.4.1
RUN mkdir /bazel && \
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \

View File

@ -7,5 +7,6 @@ include MANIFEST.in
include README.md
include requirements.txt
recursive-include mediapipe/modules *.tflite *.txt
recursive-include mediapipe/graphs *.binarypb
recursive-include mediapipe/modules *.tflite *.txt *.binarypb
exclude mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite
exclude mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite

View File

@ -8,48 +8,61 @@ nav_order: 1
--------------------------------------------------------------------------------
## Cross-platform ML solutions made simple
## Live ML anywhere
[MediaPipe](https://google.github.io/mediapipe/) is the simplest way for researchers
and developers to build world-class ML solutions and applications for mobile,
desktop/cloud, web and IoT devices.
[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable
ML solutions for live and streaming media.
![accelerated.png](docs/images/accelerated_small.png) | ![cross_platform.png](docs/images/cross_platform_small.png)
:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------:
***End-to-End acceleration***: *built-in fast ML inference and processing accelerated even on common hardware* | ***Build one, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
![ready_to_use.png](docs/images/ready_to_use_small.png) | ![open_source.png](docs/images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
## ML solutions in MediaPipe
Face Detection | Face Mesh | Iris | Hands | Pose | Hair Segmentation
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---------------:
[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](docs/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation)
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------:
[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](docs/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](docs/images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic)
Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](docs/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](docs/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to solutions/solutions.md. -->
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md)
:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | |
See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe offers customizable Python solutions as a prebuilt Python package on
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
`pip install mediapipe`. It also provides tools for users to build their own
solutions. Please see
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
for more info.
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
@ -89,7 +102,13 @@ run code search using
## Publications
* [Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog)
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
in Google AI Blog
* [MediaPipe 3D Face Transform](https://developers.googleblog.com/2020/09/mediapipe-3d-face-transform.html)
in Google Developers Blog
* [Instant Motion Tracking With MediaPipe](https://developers.googleblog.com/2020/08/instant-motion-tracking-with-mediapipe.html)
in Google Developers Blog
* [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
in Google AI Blog

View File

@ -10,14 +10,14 @@ http_archive(
sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0",
)
load("@bazel_skylib//lib:versions.bzl", "versions")
versions.check(minimum_bazel_version = "2.0.0")
versions.check(minimum_bazel_version = "3.4.0")
# ABSL cpp library lts_2020_02_25
# ABSL cpp library lts_2020_09_23
http_archive(
name = "com_google_absl",
urls = [
"https://github.com/abseil/abseil-cpp/archive/20200225.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/20200923.tar.gz",
],
# Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
patches = [
@ -26,8 +26,8 @@ http_archive(
patch_args = [
"-p1",
],
strip_prefix = "abseil-cpp-20200225",
sha256 = "728a813291bdec2aa46eab8356ace9f75ac2ed9dfe2df5ab603c4e6c09f1c353"
strip_prefix = "abseil-cpp-20200923",
sha256 = "b3744a4f7a249d5eaf2309daad597631ce77ea62e0fc6abffbab4b4c3dc0fc08"
)
http_archive(
@ -38,8 +38,8 @@ http_archive(
http_archive(
name = "rules_foreign_cc",
strip_prefix = "rules_foreign_cc-master",
url = "https://github.com/bazelbuild/rules_foreign_cc/archive/master.zip",
strip_prefix = "rules_foreign_cc-main",
url = "https://github.com/bazelbuild/rules_foreign_cc/archive/main.zip",
)
load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies")
@ -99,7 +99,7 @@ http_archive(
"https://github.com/google/glog/archive/0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6.zip",
],
patches = [
"@//third_party:com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff"
"@//third_party:com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff",
],
patch_args = [
"-p1",
@ -170,15 +170,15 @@ http_archive(
http_archive(
name = "ceres_solver",
url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
url = "https://github.com/ceres-solver/ceres-solver/archive/2.0.0.zip",
patches = [
"@//third_party:ceres_solver_compatibility_fixes.diff"
],
patch_args = [
"-p1",
],
strip_prefix = "ceres-solver-1.14.0",
sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1"
strip_prefix = "ceres-solver-2.0.0",
sha256 = "db12d37b4cebb26353ae5b7746c7985e00877baa8e7b12dc4d3a1512252fff3b"
)
http_archive(
@ -324,8 +324,9 @@ maven_install(
"androidx.lifecycle:lifecycle-common:2.2.0",
"androidx.annotation:annotation:aar:1.1.0",
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
"androidx.camera:camera-core:aar:1.0.0-alpha06",
"androidx.camera:camera-camera2:aar:1.0.0-alpha06",
"androidx.camera:camera-core:1.0.0-beta10",
"androidx.camera:camera-camera2:1.0.0-beta10",
"androidx.camera:camera-lifecycle:1.0.0-beta10",
"androidx.constraintlayout:constraintlayout:aar:1.1.3",
"androidx.core:core:aar:1.1.0-rc03",
"androidx.legacy:legacy-support-v4:aar:1.0.0",
@ -337,6 +338,7 @@ maven_install(
"com.google.flogger:flogger-system-backend:0.3.1",
"com.google.flogger:flogger:0.3.1",
"com.google.guava:guava:27.0.1-android",
"com.google.guava:listenablefuture:1.0",
"junit:junit:4.12",
"org.hamcrest:hamcrest-library:1.3",
],
@ -362,9 +364,9 @@ http_archive(
)
#Tensorflow repo should always go after the other external dependencies.
# 2020-08-30
_TENSORFLOW_GIT_COMMIT = "57b009e31e59bd1a7ae85ef8c0232ed86c9b71db"
_TENSORFLOW_SHA256= "de7f5f06204e057383028c7e53f3b352cdf85b3a40981b1a770c9a415a792c0e"
# 2020-12-09
_TENSORFLOW_GIT_COMMIT = "0eadbb13cef1226b1bae17c941f7870734d97f8a"
_TENSORFLOW_SHA256= "4ae06daa5b09c62f31b7bc1f781fd59053f286dd64355830d8c2ac601b795ef0"
http_archive(
name = "org_tensorflow",
urls = [
@ -372,6 +374,7 @@ http_archive(
],
patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_objc_cxx17.diff",
],
patch_args = [
"-p1",

View File

@ -89,7 +89,6 @@ for app in ${apps}; do
fi
target="${app}:${target_name}"
bin="${bin_dir}/${app}/${target_name}.apk"
apk="${out_dir}/${target_name}.apk"
echo "=== Target: ${target}"
@ -99,7 +98,25 @@ for app in ${apps}; do
if [[ $strip == true ]]; then
bazel_flags+=(--linkopt=-s)
fi
fi
if [[ ${app_name} == "objectdetection3d" ]]; then
categories=("shoe" "chair" "cup" "camera" "shoe_1stage" "chair_1stage")
for category in "${categories[@]}"; do
apk="${out_dir}/${target_name}_${category}.apk"
if [[ $install_only == false ]]; then
bazel_flags_extended=("${bazel_flags[@]}")
if [[ ${category} != "shoe" ]]; then
bazel_flags_extended+=(--define ${category}=true)
fi
bazel "${bazel_flags_extended[@]}"
cp -f "${bin}" "${apk}"
fi
apks+=(${apk})
done
else
apk="${out_dir}/${target_name}.apk"
if [[ $install_only == false ]]; then
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_4
fi
@ -109,23 +126,9 @@ for app in ${apps}; do
switch_to_opencv_3
fi
fi
if [[ ${app_name} == "objectdetection3d" ]]; then
orig_apk=${apk}
apk="${out_dir}/${target_name}_shoes.apk"
cp -f "${orig_apk}" "${apk}"
apks+=(${apk})
apk="${out_dir}/${target_name}_chairs.apk"
if [[ $install_only == false ]]; then
bazel_flags+=(--define chair=true)
bazel "${bazel_flags[@]}"
cp -f "${bin}" "${apk}"
fi
fi
apks+=(${apk})
fi
done
echo

109
build_desktop_examples.sh Normal file
View File

@ -0,0 +1,109 @@
#!/bin/bash
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========================================================================
#
# Script to build/run all MediaPipe desktop example apps (with webcam input).
#
# To build and run all apps and store them in out_dir:
# $ ./build_ios_examples.sh -d out_dir
# Omitting -d and the associated directory saves all generated apps in the
# current directory.
# To build all apps and store them in out_dir:
# $ ./build_ios_examples.sh -d out_dir -b
# Omitting -d and the associated directory saves all generated apps in the
# current directory.
# To run all apps already stored in out_dir:
# $ ./build_ios_examples.sh -d out_dir -r
# Omitting -d and the associated directory assumes all apps are in the current
# directory.
set -e
out_dir="."
build_only=false
run_only=false
app_dir="mediapipe/examples/desktop"
bin_dir="bazel-bin"
declare -a default_bazel_flags=(build -c opt --define MEDIAPIPE_DISABLE_GPU=1)
while [[ -n $1 ]]; do
case $1 in
-d)
shift
out_dir=$1
;;
-b)
build_only=true
;;
-r)
run_only=true
;;
*)
echo "Unsupported input argument $1."
exit 1
;;
esac
shift
done
echo "app_dir: $app_dir"
echo "out_dir: $out_dir"
declare -a bazel_flags
apps="${app_dir}/*"
for app in ${apps}; do
if [[ -d "${app}" ]]; then
target_name=${app##*/}
if [[ "${target_name}" == "autoflip" ||
"${target_name}" == "hello_world" ||
"${target_name}" == "media_sequence" ||
"${target_name}" == "object_detection_3d" ||
"${target_name}" == "template_matching" ||
"${target_name}" == "youtube8m" ]]; then
continue
fi
target="${app}:${target_name}_cpu"
echo "=== Target: ${target}"
if [[ $run_only == false ]]; then
bazel_flags=("${default_bazel_flags[@]}")
bazel_flags+=(${target})
bazel "${bazel_flags[@]}"
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
fi
if [[ $build_only == false ]]; then
if [[ ${target_name} == "object_tracking" ]]; then
graph_name="tracking/object_detection_tracking"
elif [[ ${target_name} == "upper_body_pose_tracking" ]]; then
graph_name="pose_tracking/upper_body_pose_tracking"
else
graph_name="${target_name}/${target_name}"
fi
if [[ ${target_name} == "holistic_tracking" ||
${target_name} == "iris_tracking" ||
${target_name} == "pose_tracking" ||
${target_name} == "upper_body_pose_tracking" ]]; then
graph_suffix="cpu"
else
graph_suffix="desktop_live"
fi
GLOG_logtostderr=1 "${out_dir}/${target_name}_cpu" \
--calculator_graph_config_file=mediapipe/graphs/"${graph_name}_${graph_suffix}.pbtxt"
fi
fi
done

View File

@ -67,26 +67,26 @@ class CalculatorBase {
// The subclasses of CalculatorBase must implement GetContract.
// ...
static ::MediaPipe::Status GetContract(CalculatorContract* cc);
static absl::Status GetContract(CalculatorContract* cc);
// Open is called before any Process() calls, on a freshly constructed
// calculator. Subclasses may override this method to perform necessary
// setup, and possibly output Packets and/or set output streams' headers.
// ...
virtual ::MediaPipe::Status Open(CalculatorContext* cc) {
return ::MediaPipe::OkStatus();
virtual absl::Status Open(CalculatorContext* cc) {
return absl::OkStatus();
}
// Processes the incoming inputs. May call the methods on cc to access
// inputs and produce outputs.
// ...
virtual ::MediaPipe::Status Process(CalculatorContext* cc) = 0;
virtual absl::Status Process(CalculatorContext* cc) = 0;
// Is called if Open() was called and succeeded. Is called either
// immediately after processing is complete or after a graph run has ended
// (if an error occurred in the graph). ...
virtual ::MediaPipe::Status Close(CalculatorContext* cc) {
return ::MediaPipe::OkStatus();
virtual absl::Status Close(CalculatorContext* cc) {
return absl::OkStatus();
}
...
@ -199,7 +199,7 @@ name and index number. In the function below input are output are identified:
// c++ Code snippet describing the SomeAudioVideoCalculator GetContract() method
class SomeAudioVideoCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).SetAny();
// SetAny() is used to specify that whatever the type of the
// stream is, it's acceptable. This does not mean that any
@ -209,13 +209,13 @@ class SomeAudioVideoCalculator : public CalculatorBase {
cc->Outputs().Tag("VIDEO").Set<ImageFrame>();
cc->Outputs().Get("AUDIO", 0).Set<Matrix>();
cc->Outputs().Get("AUDIO", 1).Set<Matrix>();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
```
## Processing
`Process()` called on a non-source node must return `::mediapipe::OkStatus()` to
`Process()` called on a non-source node must return `absl::OkStatus()` to
indicate that all went well, or any other status code to signal an error
If a non-source calculator returns `tool::StatusStop()`, then this signals the
@ -224,12 +224,12 @@ input streams will be closed (and remaining Packets will propagate through the
graph).
A source node in a graph will continue to have `Process()` called on it as long
as it returns `::mediapipe::OkStatus(`). To indicate that there is no more data
to be generated return `tool::StatusStop()`. Any other status indicates an error
has occurred.
as it returns `absl::OkStatus(`). To indicate that there is no more data to be
generated return `tool::StatusStop()`. Any other status indicates an error has
occurred.
`Close()` returns `::mediapipe::OkStatus()` to indicate success. Any other
status indicates a failure.
`Close()` returns `absl::OkStatus()` to indicate success. Any other status
indicates a failure.
Here is the basic `Process()` function. It uses the `Input()` method (which can
be used only if the calculator has a single input) to request its input data. It
@ -238,13 +238,13 @@ and does the calculations. When done it releases the pointer when adding it to
the output stream.
```c++
::util::Status MyCalculator::Process() {
absl::Status MyCalculator::Process() {
const Matrix& input = Input()->Get<Matrix>();
std::unique_ptr<Matrix> output(new Matrix(input.rows(), input.cols()));
// do your magic here....
// output->row(n) = ...
Output()->Add(output.release(), InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
```
@ -312,7 +312,7 @@ namespace mediapipe {
//
class PacketClonerCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
const int tick_signal_index = cc->Inputs().NumEntries() - 1;
// cc->Inputs().NumEntries() returns the number of input streams
// for the PacketClonerCalculator
@ -322,10 +322,10 @@ class PacketClonerCalculator : public CalculatorBase {
cc->Outputs().Index(i).SetSameAs(&cc->Inputs().Index(i));
}
cc->Inputs().Index(tick_signal_index).SetAny();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) final {
absl::Status Open(CalculatorContext* cc) final {
tick_signal_index_ = cc->Inputs().NumEntries() - 1;
current_.resize(tick_signal_index_);
// Pass along the header for each stream if present.
@ -336,10 +336,10 @@ class PacketClonerCalculator : public CalculatorBase {
// the header for the input stream of index i
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) final {
absl::Status Process(CalculatorContext* cc) final {
// Store input signals.
for (int i = 0; i < tick_signal_index_; ++i) {
if (!cc->Inputs().Index(i).Value().IsEmpty()) {
@ -364,7 +364,7 @@ class PacketClonerCalculator : public CalculatorBase {
}
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
private:

View File

@ -66,10 +66,10 @@ calculator derived from base class GlSimpleCalculator. The GPU calculator
// See GlSimpleCalculator for inputs, outputs and input side packets.
class LuminanceCalculator : public GlSimpleCalculator {
public:
::mediapipe::Status GlSetup() override;
::mediapipe::Status GlRender(const GlTexture& src,
absl::Status GlSetup() override;
absl::Status GlRender(const GlTexture& src,
const GlTexture& dst) override;
::mediapipe::Status GlTeardown() override;
absl::Status GlTeardown() override;
private:
GLuint program_ = 0;
@ -77,7 +77,7 @@ class LuminanceCalculator : public GlSimpleCalculator {
};
REGISTER_CALCULATOR(LuminanceCalculator);
::mediapipe::Status LuminanceCalculator::GlRender(const GlTexture& src,
absl::Status LuminanceCalculator::GlRender(const GlTexture& src,
const GlTexture& dst) {
static const GLfloat square_vertices[] = {
-1.0f, -1.0f, // bottom left
@ -128,7 +128,7 @@ REGISTER_CALCULATOR(LuminanceCalculator);
glDeleteVertexArrays(1, &vao);
glDeleteBuffers(2, vbo);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
```

View File

@ -219,23 +219,23 @@ packet timestamps 0, 1, 2, 3, ...
```c++
class UnitDelayCalculator : public Calculator {
public:
 static ::util::Status FillExpectations(
 static absl::Status FillExpectations(
     const CalculatorOptions& extendable_options, PacketTypeSet* inputs,
     PacketTypeSet* outputs, PacketTypeSet* input_side_packets) {
   inputs->Index(0)->Set<int>("An integer.");
   outputs->Index(0)->Set<int>("The input delayed by one time unit.");
   return ::mediapipe::OkStatus();
   return absl::OkStatus();
 }
 ::util::Status Open() final {
 absl::Status Open() final {
   Output()->Add(new int(0), Timestamp(0));
   return ::mediapipe::OkStatus();
   return absl::OkStatus();
 }
 ::util::Status Process() final {
 absl::Status Process() final {
   const Packet& packet = Input()->Value();
   Output()->AddPacket(packet.At(packet.Timestamp().NextAllowedInStream()));
   return ::mediapipe::OkStatus();
   return absl::OkStatus();
 }
};
```

View File

@ -0,0 +1,191 @@
---
layout: default
title: MediaPipe on Android
parent: Getting Started
has_children: true
has_toc: false
nav_order: 1
---
# MediaPipe on Android
{: .no_toc }
1. TOC
{:toc}
---
Please follow instructions below to build Android example apps in the supported
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
example apps, start from [Hello World! on Android](./hello_world_android.md). To
incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
## Building Android example apps
### Prerequisite
* Install MediaPipe following these [instructions](./install.md).
* Setup Java Runtime.
* Setup Android SDK release 28.0.3 and above.
* Setup Android NDK r18b and above.
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
below for Android Studio setup). However, if you prefer using MediaPipe without
Android Studio, please run
[`setup_android_sdk_and_ndk.sh`](https://github.com/google/mediapipe/blob/master/setup_android_sdk_and_ndk.sh)
to download and setup Android SDK and NDK before building any Android example
apps.
If Android SDK and NDK are already installed (e.g., by Android Studio), set
$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK.
```bash
export ANDROID_HOME=<path to the Android SDK>
export ANDROID_NDK_HOME=<path to the Android NDK>
```
In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch
to a lower Android API level. You can achieve this by specifying `api_level =
$YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
android_sdk_repository() in the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
Please verify all the necessary packages are installed.
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 17c or above
### Option 1: Build with Bazel in Command Line
Tip: You can run this
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
to build (and install) all MediaPipe Android example apps.
1. To build an Android example app, build against the corresponding
`android_binary` build target. For instance, for
[MediaPipe Hands](../solutions/hands.md) the target is `handtrackinggpu` in
the
[BUILD](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD)
file:
Note: To reduce the binary size, consider appending `--linkopt="-s"` to the
command below to strip symbols.
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu
```
2. Install it on a device with:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
```
### Option 2: Build with Bazel in Android Studio
The MediaPipe project can be imported into Android Studio using the Bazel
plugins. This allows the MediaPipe examples to be built and modified in Android
Studio.
To incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
The steps below use Android Studio 3.5 to build and install a MediaPipe example
app:
1. Install and launch Android Studio 3.5.
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 17c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` -> `Plugins` to install `Bazel`.
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
file to point to your SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` -> `Edit Configurations...`.
* Select `Templates` -> `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.

View File

@ -1,8 +1,9 @@
---
layout: default
title: MediaPipe Android Archive
parent: Getting Started
nav_order: 7
parent: MediaPipe on Android
grand_parent: Getting Started
nav_order: 2
---
# MediaPipe Android Archive
@ -44,7 +45,8 @@ each project.
2. Run the Bazel build command to generate the AAR.
```bash
bazel build -c opt --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --fat_apk_cpu=arm64-v8a,armeabi-v7a \
bazel build -c opt --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a --strip=ALWAYS \
//path/to/the/aar/build/file:aar_name
```
@ -85,16 +87,14 @@ each project.
Build the MediaPipe binary graph and copy the assets into
app/src/main/assets, e.g., for the face detection graph, you need to build
and copy
[the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41),
[the tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite),
[the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41)
and
[the label map](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front_labelmap.txt).
[the face detection tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite).
```bash
bazel build -c opt mediapipe/mediapipe/graphs/face_detection:mobile_gpu_binary_graph
cp bazel-bin/mediapipe/graphs/face_detection/mobile_gpu.binarypb /path/to/your/app/src/main/assets/
cp mediapipe/models/face_detection_front.tflite /path/to/your/app/src/main/assets/
cp mediapipe/models/face_detection_front_labelmap.txt /path/to/your/app/src/main/assets/
cp mediapipe/modules/face_detection/face_detection_front.tflite /path/to/your/app/src/main/assets/
```
![Screenshot](../images/mobile/assets_location.png)
@ -132,9 +132,10 @@ each project.
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library
def camerax_version = "1.0.0-alpha06"
def camerax_version = "1.0.0-beta10"
implementation "androidx.camera:camera-core:$camerax_version"
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
}
```

View File

@ -2,7 +2,7 @@
layout: default
title: Building MediaPipe Examples
parent: Getting Started
nav_order: 2
nav_exclude: true
---
# Building MediaPipe Examples
@ -12,496 +12,22 @@ nav_order: 2
{:toc}
---
## Android
### Android
### Prerequisite
Please see these [instructions](./android.md).
* Java Runtime.
* Android SDK release 28.0.3 and above.
* Android NDK r18b and above.
### iOS
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
below for Android Studio setup). However, if you prefer using MediaPipe without
Android Studio, please run
[`setup_android_sdk_and_ndk.sh`](https://github.com/google/mediapipe/blob/master/setup_android_sdk_and_ndk.sh)
to download and setup Android SDK and NDK before building any Android example
apps.
Please see these [instructions](./ios.md).
If Android SDK and NDK are already installed (e.g., by Android Studio), set
$ANDROID_HOME and $ANDROID_NDK_HOME to point to the installed SDK and NDK.
### Python
```bash
export ANDROID_HOME=<path to the Android SDK>
export ANDROID_NDK_HOME=<path to the Android NDK>
```
Please see these [instructions](./python.md).
In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch
to a lower Android API level. You can achieve this by specifying `api_level =
$YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
android_sdk_repository() in the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
### JavaScript
Please verify all the necessary packages are installed.
Please see these [instructions](./javascript.md).
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 17c or above
### C++
### Option 1: Build with Bazel in Command Line
Tip: You can run this
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
to build (and install) all MediaPipe Android example apps.
1. To build an Android example app, build against the corresponding
`android_binary` build target. For instance, for
[MediaPipe Hands](../solutions/hands.md) the target is `handtrackinggpu` in
the
[BUILD](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD)
file:
Note: To reduce the binary size, consider appending `--linkopt="-s"` to the
command below to strip symbols.
```bash
bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu
```
2. Install it on a device with:
```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
```
### Option 2: Build with Bazel in Android Studio
The MediaPipe project can be imported into Android Studio using the Bazel
plugins. This allows the MediaPipe examples to be built and modified in Android
Studio.
To incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
The steps below use Android Studio 3.5 to build and install a MediaPipe example
app:
1. Install and launch Android Studio 3.5.
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 17c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` -> `Plugins` to install `Bazel`.
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
file to point to your SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` -> `Edit Configurations...`.
* Select `Templates` -> `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.
## iOS
### Prerequisite
1. Install [Xcode](https://developer.apple.com/xcode/), then install the
Command Line Tools using:
```bash
xcode-select --install
```
2. Install [Bazel](https://bazel.build/).
We recommend using [Homebrew](https://brew.sh/) to get the latest version.
3. Set Python 3.7 as the default Python version and install the Python "six"
library. This is needed for TensorFlow.
```bash
pip3 install --user six
```
4. Clone the MediaPipe repository.
```bash
git clone https://github.com/google/mediapipe.git
```
### Set up a bundle ID prefix
All iOS apps must have a bundle ID, and you must have a provisioning profile
that lets you install an app with that ID onto your phone. To avoid clashes
between different MediaPipe users, you need to configure a unique prefix for the
bundle IDs of our iOS demo apps.
If you have a custom provisioning profile, see
[Custom provisioning](#custom-provisioning) below.
Otherwise, run this command to generate a unique prefix:
```bash
python3 mediapipe/examples/ios/link_local_profiles.py
```
### Create an Xcode project
This allows you to edit and debug one of the example apps in Xcode. It also
allows you to make use of automatic provisioning (see later section).
1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating
Xcode projects from Bazel build configurations.
```bash
# cd out of the mediapipe directory, then:
git clone https://github.com/bazelbuild/tulsi.git
cd tulsi
# remove Xcode version from Tulsi's .bazelrc (see http://github.com/bazelbuild/tulsi#building-and-installing):
sed -i .orig '/xcode_version/d' .bazelrc
# build and run Tulsi:
sh build_and_run.sh
```
This will install `Tulsi.app` inside the `Applications` directory in your
home directory.
2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app.
Tip: If Tulsi displays an error saying "Bazel could not be found", press the
"Bazel..." button in the Packages tab and select the `bazel` executable in
your homebrew `/bin/` directory.
3. Select the MediaPipe config in the Configs tab, then press the Generate
button below. You will be asked for a location to save the Xcode project.
Once the project is generated, it will be opened in Xcode.
If you get an error about bundle IDs, see the
[previous section](#set-up-a-bundle-id-prefix).
### Set up provisioning
To install applications on an iOS device, you need a provisioning profile. There
are two options:
1. Automatic provisioning. This allows you to build and install an app to your
personal device. The provisining profile is managed by Xcode, and has to be
updated often (it is valid for about a week).
2. Custom provisioning. This uses a provisioning profile associated with an
Apple developer account. These profiles have a longer validity period and
can target multiple devices, but you need a paid developer account with
Apple to obtain one.
#### Automatic provisioning
1. Create an Xcode project for MediaPipe, as discussed
[earlier](#create-an-xcode-project).
2. In the project navigator in the left sidebar, select the "Mediapipe"
project.
3. Select one of the application targets, e.g. HandTrackingGpuApp.
4. Select the "Signing & Capabilities" tab.
5. Check "Automatically manage signing", and confirm the dialog box.
6. Select "_Your Name_ (Personal Team)" in the Team pop-up menu.
7. This set-up needs to be done once for each application you want to install.
Repeat steps 3-6 as needed.
This generates provisioning profiles for each app you have selected. Now we need
to tell Bazel to use them. We have provided a script to make this easier.
1. In the terminal, to the `mediapipe` directory where you cloned the
repository.
2. Run this command:
```bash
python3 mediapipe/examples/ios/link_local_profiles.py
```
This will find and link the provisioning profile for all applications for which
you have enabled automatic provisioning in Xcode.
Note: once a profile expires, Xcode will generate a new one; you must then run
this script again to link the updated profiles.
#### Custom provisioning
1. Obtain a provisioning profile from Apple.
Tip: You can use this command to see the provisioning profiles you have
previously downloaded using Xcode: `open ~/Library/MobileDevice/"Provisioning
Profiles"`. If there are none, generate and download a profile on
[Apple's developer site](https://developer.apple.com/account/resources/).
1. Symlink or copy your provisioning profile to
`mediapipe/mediapipe/provisioning_profile.mobileprovision`.
```bash
cd mediapipe
ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision
```
Note: if you had previously set up automatic provisioning, you should remove the
`provisioning_profile.mobileprovision` symlink in each example's directory,
since it will take precedence over the common one. You can also overwrite it
with you own profile if you need a different profile for different apps.
1. Open `mediapipe/examples/ios/bundle_id.bzl`, and change the
`BUNDLE_ID_PREFIX` to a prefix associated with your provisioning profile.
### Build and run an app using Xcode
1. Create the Xcode project, and make sure you have set up either automatic or
custom provisioning.
2. You can now select any of the MediaPipe demos in the target menu, and build
and run them as normal.
Note: When you ask Xcode to run an app, by default it will use the Debug
configuration. Some of our demos are computationally heavy; you may want to use
the Release configuration for better performance.
Tip: To switch build configuration in Xcode, click on the target menu, choose
"Edit Scheme...", select the Run action, and switch the Build Configuration from
Debug to Release. Note that this is set independently for each target.
Tip: On the device, in Settings > General > Device Management, make sure the
developer (yourself) is trusted.
### Build an app using the command line
1. Make sure you have set up either automatic or custom provisioning.
2. Using [MediaPipe Hands](../solutions/hands.md) for example, run:
```bash
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp
```
You may see a permission request from `codesign` in order to sign the app.
Tip: If you are using custom provisioning, you can run this
[script](https://github.com/google/mediapipe/blob/master/build_ios_examples.sh)
to build all MediaPipe iOS example apps.
3. In Xcode, open the `Devices and Simulators` window (command-shift-2).
4. Make sure your device is connected. You will see a list of installed apps.
Press the "+" button under the list, and select the `.ipa` file built by
Bazel.
5. You can now run the app on your device.
Tip: On the device, in Settings > General > Device Management, make sure the
developer (yourself) is trusted.
## Desktop
### Option 1: Running on CPU
1. To build, for example, [MediaPipe Hands](../solutions/hands.md), run:
```bash
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu
```
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible.
### Option 2: Running on GPU
Note: This currently works only on Linux, and please first follow
[OpenGL ES Setup on Linux Desktop](./gpu_support.md#opengl-es-setup-on-linux-desktop).
1. To build, for example, [MediaPipe Hands](../solutions/hands.md), run:
```bash
bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \
mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu
```
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible, or GPU drivers not setup
properly.
## Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below in
[Run in python interpreter](#run-in-python-interpreter) and in this
[colab](https://mediapipe.page.link/mp-py-colab).
### Run in Python interpreter
Using [MediaPipe Pose](../solutions/pose.md) as an example:
```bash
# Activate a Python virtual environment.
$ python3 -m venv mp_env && source mp_env/bin/activate
# Install MediaPipe Python package
(mp_env)$ pip install mediapipe
# Run in Python interpreter
(mp_env)$ python3
>>> import mediapipe as mp
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
# For image input
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
# For live camera input
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
>>> pose_tracker.run_live()
# Close the tracker.
>>> pose_tracker.close()
```
Tip: Use command `deactivate` to exit the Python virtual environment.
### Building Python package from source
Follow these steps only if you have local changes and need to build the Python
package from source. Otherwise, we strongly encourage our users to simply run
`pip install mediapipe`, more convenient and much faster.
1. Make sure that Bazel and OpenCV are correctly installed and configured for
MediaPipe. Please see [Installation](./install.md) for how to setup Bazel
and OpenCV for MediaPipe on Linux and macOS.
2. Install the following dependencies.
```bash
# Debian or Ubuntu
$ sudo apt install python3-dev
$ sudo apt install python3-venv
$ sudo apt install -y protobuf-compiler
```
```bash
# macOS
$ brew install protobuf
```
3. Activate a Python virtual environment.
```bash
$ python3 -m venv mp_env && source mp_env/bin/activate
```
4. In the virtual environment, go to the MediaPipe repo directory.
5. Install the required Python packages.
```bash
(mp_env)mediapipe$ pip3 install -r requirements.txt
```
6. Generate and install MediaPipe package.
```bash
(mp_env)mediapipe$ python3 setup.py gen_protos
(mp_env)mediapipe$ python3 setup.py install --link-opencv
```
Please see these [instructions](./cpp.md).

View File

@ -0,0 +1,62 @@
---
layout: default
title: MediaPipe in C++
parent: Getting Started
has_children: true
has_toc: false
nav_order: 5
---
# MediaPipe in C++
{: .no_toc }
1. TOC
{:toc}
---
Please follow instructions below to build C++ command-line example apps in the
supported MediaPipe [solutions](../solutions/solutions.md). To learn more about
these example apps, start from [Hello World! in C++](./hello_world_cpp.md).
## Building C++ command-line example apps
### Option 1: Running on CPU
1. To build, for example, [MediaPipe Hands](../solutions/hands.md), run:
```bash
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu
```
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible.
### Option 2: Running on GPU
Note: This currently works only on Linux, and please first follow
[OpenGL ES Setup on Linux Desktop](./gpu_support.md#opengl-es-setup-on-linux-desktop).
1. To build, for example, [MediaPipe Hands](../solutions/hands.md), run:
```bash
bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \
mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu
```
2. To run the application:
```bash
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \
--calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt
```
This will open up your webcam as long as it is connected and on. Any errors
is likely due to your webcam being not accessible, or GPU drivers not setup
properly.

View File

@ -2,7 +2,7 @@
layout: default
title: GPU Support
parent: Getting Started
nav_order: 6
nav_order: 7
---
# GPU Support

View File

@ -1,8 +1,9 @@
---
layout: default
title: Hello World! on Android
parent: Getting Started
nav_order: 3
parent: MediaPipe on Android
grand_parent: Getting Started
nav_order: 1
---
# Hello World! on Android
@ -58,7 +59,7 @@ node: {
output_stream: "luma_video"
}
# Applies the Sobel filter to luminance images sotred in RGB format.
# Applies the Sobel filter to luminance images stored in RGB format.
node: {
calculator: "SobelEdgesCalculator"
input_stream: "luma_video"
@ -446,8 +447,8 @@ visible so that we can start seeing frames from the `previewFrameTexture`.
However, before starting the camera, we need to decide which camera we want to
use. [`CameraXPreviewHelper`] inherits from [`CameraHelper`] which provides two
options, `FRONT` and `BACK`. We can pass in the decision from the `BUILD` file
as metadata such that no code change is required to build a another version of
the app using a different camera.
as metadata such that no code change is required to build another version of the
app using a different camera.
Assuming we want to use `BACK` camera to perform edge detection on a live scene
that we view from the camera, add the metadata into `AndroidManifest.xml`:
@ -496,7 +497,7 @@ CameraHelper.CameraFacing cameraFacing =
applicationInfo.metaData.getBoolean("cameraFacingFront", false)
? CameraHelper.CameraFacing.FRONT
: CameraHelper.CameraFacing.BACK;
cameraHelper.startCamera(this, cameraFacing, /*surfaceTexture=*/ null);
cameraHelper.startCamera(this, cameraFacing, /*unusedSurfaceTexture=*/ null);
```
At this point, the application should build successfully. However, when you run

View File

@ -1,11 +1,12 @@
---
layout: default
title: Hello World! on Desktop (C++)
parent: Getting Started
nav_order: 5
title: Hello World! in C++
parent: MediaPipe in C++
grand_parent: Getting Started
nav_order: 1
---
# Hello World! on Desktop (C++)
# Hello World! in C++
{: .no_toc }
1. TOC
@ -43,7 +44,7 @@ nav_order: 5
`PrintHelloWorld()` function, defined in a [`CalculatorGraphConfig`] proto.
```C++
::mediapipe::Status PrintHelloWorld() {
absl::Status PrintHelloWorld() {
// Configures a simple graph, which concatenates 2 PassThroughCalculators.
CalculatorGraphConfig config = ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "in"

View File

@ -1,8 +1,9 @@
---
layout: default
title: Hello World! on iOS
parent: Getting Started
nav_order: 4
parent: MediaPipe on iOS
grand_parent: Getting Started
nav_order: 1
---
# Hello World! on iOS
@ -193,8 +194,7 @@ bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/helloworld:HelloWor
Then, go back to XCode, open Window > Devices and Simulators, select your
device, and add the `.ipa` file generated by the command above to your device.
Here is the document on [setting up and compiling](./building_examples.md#ios)
iOS MediaPipe apps.
Here is the document on [setting up and compiling](./ios.md) iOS MediaPipe apps.
Open the application on your device. Since it is empty, it should display a
blank white screen.
@ -492,6 +492,9 @@ in our app:
if (![self.mediapipeGraph startWithError:&error]) {
NSLog(@"Failed to start graph: %@", error);
}
else if (![self.mediapipeGraph waitUntilIdleWithError:&error]) {
NSLog(@"Failed to complete graph initial run: %@", error);
}
dispatch_async(_videoQueue, ^{
[_cameraSource start];
@ -500,8 +503,9 @@ in our app:
}];
```
Note: It is important to start the graph before starting the camera, so that the
graph is ready to process frames as soon as the camera starts sending them.
Note: It is important to start the graph before starting the camera and wait
until completion, so that the graph is ready to process frames as soon as the
camera starts sending them.
Earlier, when we received frames from the camera in the `processVideoFrame`
function, we displayed them in the `_liveView` using the `_renderer`. Now, we

View File

@ -2,7 +2,7 @@
layout: default
title: Installation
parent: Getting Started
nav_order: 1
nav_order: 6
---
# Installation
@ -12,7 +12,7 @@ nav_order: 1
{:toc}
---
Note: To interoperate with OpenCV, OpenCV 3.x and above are preferred. OpenCV
Note: To interoperate with OpenCV, OpenCV 3.x to 4.1 are preferred. OpenCV
2.x currently works but interoperability support may be deprecated in the
future.
@ -23,39 +23,38 @@ Note: To make Mediapipe work with TensorFlow, please set Python 3.7 as the
default Python version and install the Python "six" library by running `pip3
install --user six`.
Note: To build and run Android example apps, see these
[instructions](./building_examples.md#android). To build and run iOS example
apps, see these [instructions](./building_examples.md#ios).
## Installing on Debian and Ubuntu
1. Checkout MediaPipe repository.
1. Install Bazel.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-ubuntu.html)
to install Bazel 3.4 or higher.
For Nvidia Jetson and Raspberry Pi devices with aarch64 Linux, Bazel needs
to be built from source:
```bash
# For Bazel 3.4.1
mkdir $HOME/bazel-3.4.1
cd $HOME/bazel-3.4.1
wget https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-dist.zip
sudo apt-get install build-essential openjdk-8-jdk python zip unzip
unzip bazel-3.4.1-dist.zip
env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh
sudo cp output/bazel /usr/local/bin/
```
2. Checkout MediaPipe repository.
```bash
$ cd $HOME
$ git clone https://github.com/google/mediapipe.git
# Change directory into MediaPipe root directory
$ cd mediapipe
```
2. Install Bazel.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-ubuntu.html)
to install Bazel 2.0 or higher.
For Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, Bazel needs to
be built from source.
```bash
# For Bazel 3.0.0
wget https://github.com/bazelbuild/bazel/releases/download/3.0.0/bazel-3.0.0-dist.zip
sudo apt-get install build-essential openjdk-8-jdk python zip unzip
unzip bazel-3.0.0-dist.zip
env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh
sudo cp output/bazel /usr/local/bin/
```
3. Install OpenCV and FFmpeg.
Option 1. Use package manager tool to install the pre-compiled OpenCV
@ -174,7 +173,7 @@ apps, see these [instructions](./building_examples.md#ios).
# when building GPU examples.
```
5. Run the [Hello World desktop example](./hello_world_desktop.md).
5. Run the [Hello World! in C++ example](./hello_world_cpp.md).
```bash
$ export GLOG_logtostderr=1
@ -208,7 +207,13 @@ build issues.
**Disclaimer**: Running MediaPipe on CentOS is experimental.
1. Checkout MediaPipe repository.
1. Install Bazel.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-redhat.html)
to install Bazel 3.4 or higher.
2. Checkout MediaPipe repository.
```bash
$ git clone https://github.com/google/mediapipe.git
@ -217,12 +222,6 @@ build issues.
$ cd mediapipe
```
2. Install Bazel.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-redhat.html)
to install Bazel 2.0 or higher.
3. Install OpenCV.
Option 1. Use package manager tool to install the pre-compiled version.
@ -304,7 +303,7 @@ build issues.
)
```
4. Run the [Hello World desktop example](./hello_world_desktop.md).
4. Run the [Hello World! in C++ example](./hello_world_cpp.md).
```bash
$ export GLOG_logtostderr=1
@ -337,7 +336,13 @@ build issues.
* Install [Xcode](https://developer.apple.com/xcode/) and its Command Line
Tools by `xcode-select --install`.
2. Checkout MediaPipe repository.
2. Install Bazel.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x)
to install Bazel 3.4 or higher.
3. Checkout MediaPipe repository.
```bash
$ git clone https://github.com/google/mediapipe.git
@ -345,19 +350,6 @@ build issues.
$ cd mediapipe
```
3. Install Bazel.
Option 1. Use package manager tool to install Bazel
```bash
$ brew install bazel
# Run 'bazel version' to check version of bazel
```
Option 2. Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x)
to install Bazel 2.0 or higher.
4. Install OpenCV and FFmpeg.
Option 1. Use HomeBrew package manager tool to install the pre-compiled
@ -427,7 +419,6 @@ build issues.
linkstatic = 1,
visibility = ["//visibility:public"],
)
```
5. Make sure that Python 3 and the Python "six" library are installed.
@ -440,7 +431,7 @@ build issues.
$ pip3 install --user six
```
6. Run the [Hello World desktop example](./hello_world_desktop.md).
6. Run the [Hello World! in C++ example](./hello_world_cpp.md).
```bash
$ export GLOG_logtostderr=1
@ -506,7 +497,7 @@ next section.
Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html)
to install Bazel 2.0 or higher.
to install Bazel 3.4 or higher.
6. Set Bazel variables.
@ -541,7 +532,7 @@ next section.
)
```
9. Run the [Hello World desktop example](./hello_world_desktop.md).
9. Run the [Hello World! in C++ example](./hello_world_cpp.md).
Note: For building MediaPipe on Windows, please add `--action_env
PYTHON_BIN_PATH="C://path//to//python.exe"` to the build command.
@ -567,7 +558,6 @@ next section.
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.279618 1200 hello_world.cc:56] Hello World!
# I20200514 20:43:12.280613 1200 hello_world.cc:56] Hello World!
```
If you run into a build error, please read
@ -607,14 +597,14 @@ cameras. Alternatively, you use a video file as input.
```bash
username@DESKTOP-TMVLBJ1:~$ curl -sLO --retry 5 --retry-max-time 10 \
https://storage.googleapis.com/bazel/3.0.0/release/bazel-3.0.0-installer-linux-x86_64.sh && \
sudo mkdir -p /usr/local/bazel/3.0.0 && \
chmod 755 bazel-3.0.0-installer-linux-x86_64.sh && \
sudo ./bazel-3.0.0-installer-linux-x86_64.sh --prefix=/usr/local/bazel/3.0.0 && \
source /usr/local/bazel/3.0.0/lib/bazel/bin/bazel-complete.bash
https://storage.googleapis.com/bazel/3.4.1/release/bazel-3.4.1-installer-linux-x86_64.sh && \
sudo mkdir -p /usr/local/bazel/3.4.1 && \
chmod 755 bazel-3.4.1-installer-linux-x86_64.sh && \
sudo ./bazel-3.4.1-installer-linux-x86_64.sh --prefix=/usr/local/bazel/3.4.1 && \
source /usr/local/bazel/3.4.1/lib/bazel/bin/bazel-complete.bash
username@DESKTOP-TMVLBJ1:~$ /usr/local/bazel/3.0.0/lib/bazel/bin/bazel version && \
alias bazel='/usr/local/bazel/3.0.0/lib/bazel/bin/bazel'
username@DESKTOP-TMVLBJ1:~$ /usr/local/bazel/3.4.1/lib/bazel/bin/bazel version && \
alias bazel='/usr/local/bazel/3.4.1/lib/bazel/bin/bazel'
```
6. Checkout MediaPipe repository.
@ -675,7 +665,7 @@ cameras. Alternatively, you use a video file as input.
)
```
8. Run the [Hello World desktop example](./hello_world_desktop.md).
8. Run the [Hello World! in C++ example](./hello_world_cpp.md).
```bash
username@DESKTOP-TMVLBJ1:~/mediapipe$ export GLOG_logtostderr=1
@ -731,7 +721,7 @@ This will use a Docker image that will isolate mediapipe's installation from the
# Successfully tagged mediapipe:latest
```
3. Run the [Hello World desktop example](./hello_world_desktop.md).
3. Run the [Hello World! in C++ example](./hello_world_cpp.md).
```bash
$ docker run -it --name mediapipe mediapipe:latest

222
docs/getting_started/ios.md Normal file
View File

@ -0,0 +1,222 @@
---
layout: default
title: MediaPipe on iOS
parent: Getting Started
has_children: true
has_toc: false
nav_order: 2
---
# MediaPipe on iOS
{: .no_toc }
1. TOC
{:toc}
---
Please follow instructions below to build iOS example apps in the supported
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
example apps, start from, start from
[Hello World! on iOS](./hello_world_ios.md).
## Building iOS example apps
### Prerequisite
1. Install MediaPipe following these [instructions](./install.md).
2. Install [Xcode](https://developer.apple.com/xcode/), then install the
Command Line Tools using:
```bash
xcode-select --install
```
3. Install [Bazel](https://bazel.build/).
We recommend using [Homebrew](https://brew.sh/) to get the latest version.
4. Set Python 3.7 as the default Python version and install the Python "six"
library. This is needed for TensorFlow.
```bash
pip3 install --user six
```
5. Clone the MediaPipe repository.
```bash
git clone https://github.com/google/mediapipe.git
```
### Set up a bundle ID prefix
All iOS apps must have a bundle ID, and you must have a provisioning profile
that lets you install an app with that ID onto your phone. To avoid clashes
between different MediaPipe users, you need to configure a unique prefix for the
bundle IDs of our iOS demo apps.
If you have a custom provisioning profile, see
[Custom provisioning](#custom-provisioning) below.
Otherwise, run this command to generate a unique prefix:
```bash
python3 mediapipe/examples/ios/link_local_profiles.py
```
### Create an Xcode project
This allows you to edit and debug one of the example apps in Xcode. It also
allows you to make use of automatic provisioning (see later section).
1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating
Xcode projects from Bazel build configurations.
```bash
# cd out of the mediapipe directory, then:
git clone https://github.com/bazelbuild/tulsi.git
cd tulsi
# remove Xcode version from Tulsi's .bazelrc (see http://github.com/bazelbuild/tulsi#building-and-installing):
sed -i .orig '/xcode_version/d' .bazelrc
# build and run Tulsi:
sh build_and_run.sh
```
This will install `Tulsi.app` inside the `Applications` directory in your
home directory.
2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app.
Tip: If Tulsi displays an error saying "Bazel could not be found", press the
"Bazel..." button in the Packages tab and select the `bazel` executable in
your homebrew `/bin/` directory.
3. Select the MediaPipe config in the Configs tab, then press the Generate
button below. You will be asked for a location to save the Xcode project.
Once the project is generated, it will be opened in Xcode.
If you get an error about bundle IDs, see the
[previous section](#set-up-a-bundle-id-prefix).
### Set up provisioning
To install applications on an iOS device, you need a provisioning profile. There
are two options:
1. Automatic provisioning. This allows you to build and install an app to your
personal device. The provisining profile is managed by Xcode, and has to be
updated often (it is valid for about a week).
2. Custom provisioning. This uses a provisioning profile associated with an
Apple developer account. These profiles have a longer validity period and
can target multiple devices, but you need a paid developer account with
Apple to obtain one.
#### Automatic provisioning
1. Create an Xcode project for MediaPipe, as discussed
[earlier](#create-an-xcode-project).
2. In the project navigator in the left sidebar, select the "Mediapipe"
project.
3. Select one of the application targets, e.g. HandTrackingGpuApp.
4. Select the "Signing & Capabilities" tab.
5. Check "Automatically manage signing", and confirm the dialog box.
6. Select "_Your Name_ (Personal Team)" in the Team pop-up menu.
7. This set-up needs to be done once for each application you want to install.
Repeat steps 3-6 as needed.
This generates provisioning profiles for each app you have selected. Now we need
to tell Bazel to use them. We have provided a script to make this easier.
1. In the terminal, to the `mediapipe` directory where you cloned the
repository.
2. Run this command:
```bash
python3 mediapipe/examples/ios/link_local_profiles.py
```
This will find and link the provisioning profile for all applications for which
you have enabled automatic provisioning in Xcode.
Note: once a profile expires, Xcode will generate a new one; you must then run
this script again to link the updated profiles.
#### Custom provisioning
1. Obtain a provisioning profile from Apple.
Tip: You can use this command to see the provisioning profiles you have
previously downloaded using Xcode: `open ~/Library/MobileDevice/"Provisioning
Profiles"`. If there are none, generate and download a profile on
[Apple's developer site](https://developer.apple.com/account/resources/).
1. Symlink or copy your provisioning profile to
`mediapipe/mediapipe/provisioning_profile.mobileprovision`.
```bash
cd mediapipe
ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision
```
Note: if you had previously set up automatic provisioning, you should remove the
`provisioning_profile.mobileprovision` symlink in each example's directory,
since it will take precedence over the common one. You can also overwrite it
with you own profile if you need a different profile for different apps.
1. Open `mediapipe/examples/ios/bundle_id.bzl`, and change the
`BUNDLE_ID_PREFIX` to a prefix associated with your provisioning profile.
### Build and run an app using Xcode
1. Create the Xcode project, and make sure you have set up either automatic or
custom provisioning.
2. You can now select any of the MediaPipe demos in the target menu, and build
and run them as normal.
Note: When you ask Xcode to run an app, by default it will use the Debug
configuration. Some of our demos are computationally heavy; you may want to use
the Release configuration for better performance.
Tip: To switch build configuration in Xcode, click on the target menu, choose
"Edit Scheme...", select the Run action, and switch the Build Configuration from
Debug to Release. Note that this is set independently for each target.
Tip: On the device, in Settings > General > Device Management, make sure the
developer (yourself) is trusted.
### Build an app using the command line
1. Make sure you have set up either automatic or custom provisioning.
2. Using [MediaPipe Hands](../solutions/hands.md) for example, run:
```bash
bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp
```
You may see a permission request from `codesign` in order to sign the app.
Tip: If you are using custom provisioning, you can run this
[script](https://github.com/google/mediapipe/blob/master/build_ios_examples.sh)
to build all MediaPipe iOS example apps.
3. In Xcode, open the `Devices and Simulators` window (command-shift-2).
4. Make sure your device is connected. You will see a list of installed apps.
Press the "+" button under the list, and select the `.ipa` file built by
Bazel.
5. You can now run the app on your device.
Tip: On the device, in Settings > General > Device Management, make sure the
developer (yourself) is trusted.

View File

@ -0,0 +1,94 @@
---
layout: default
title: MediaPipe in JavaScript
parent: Getting Started
nav_order: 4
---
# MediaPipe in JavaScript
{: .no_toc }
1. TOC
{:toc}
---
## Ready-to-use JavaScript Solutions
MediaPipe currently offers the following solutions:
Solution | NPM Package | Example
----------------- | ----------------------------- | -------
[Face Mesh][F-pg] | [@mediapipe/face_mesh][F-npm] | [mediapipe.dev/demo/face_mesh][F-demo]
[Face Detection][Fd-pg] | [@mediapipe/face_detection][Fd-npm] | [mediapipe.dev/demo/face_detection][Fd-demo]
[Hands][H-pg] | [@mediapipe/hands][H-npm] | [mediapipe.dev/demo/hands][H-demo]
[Holistic][Ho-pg] | [@mediapipe/holistic][Ho-npm] | [mediapipe.dev/demo/holistic][Ho-demo]
[Pose][P-pg] | [@mediapipe/pose][P-npm] | [mediapipe.dev/demo/pose][P-demo]
Click on a solution link above for more information, including API and code
snippets.
The quickest way to get acclimated is to look at the examples above. Each demo
has a link to a [CodePen][codepen] so that you can edit the code and try it
yourself. We have included a number of utility packages to help you get started:
* [@mediapipe/drawing_utils][draw-npm] - Utilities to draw landmarks and
connectors.
* [@mediapipe/camera_utils][cam-npm] - Utilities to operate the camera.
* [@mediapipe/control_utils][ctrl-npm] - Utilities to show sliders and FPS
widgets.
Note: See these demos and more at [MediaPipe on CodePen][codepen]
All of these solutions are staged in [NPM][npm]. You can install any package
locally with `npm install`. Example:
```
npm install @mediapipe/holistic.
```
If you would rather not stage these locally, you can rely on a CDN (e.g.,
[jsDelivr](https://www.jsdelivr.com/)). This will allow you to add scripts
directly to your HTML:
```
<head>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils@0.1/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/holistic@0.1/holistic.js" crossorigin="anonymous"></script>
</head>
```
Note: You can specify version numbers to both NPM and jsdelivr. They are
structured as `<major>.<minor>.<build>`. To prevent breaking changes from
affecting your work, restrict your request to a `<minor>` number. e.g.,
`@mediapipe/holistic@0.1`.
[Ho-pg]: ../solutions/holistic#javascript-solution-api
[F-pg]: ../solutions/face_mesh#javascript-solution-api
[Fd-pg]: ../solutions/face_detection#javascript-solution-api
[H-pg]: ../solutions/hands#javascript-solution-api
[P-pg]: ../solutions/pose#javascript-solution-api
[Ho-npm]: https://www.npmjs.com/package/@mediapipe/holistic
[F-npm]: https://www.npmjs.com/package/@mediapipe/face_mesh
[Fd-npm]: https://www.npmjs.com/package/@mediapipe/face_detection
[H-npm]: https://www.npmjs.com/package/@mediapipe/hands
[P-npm]: https://www.npmjs.com/package/@mediapipe/pose
[draw-npm]: https://www.npmjs.com/package/@mediapipe/pose
[cam-npm]: https://www.npmjs.com/package/@mediapipe/pose
[ctrl-npm]: https://www.npmjs.com/package/@mediapipe/pose
[Ho-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/holistic
[F-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_mesh
[Fd-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_detection
[H-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/hands
[P-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/pose
[Ho-pen]: https://code.mediapipe.dev/codepen/holistic
[F-pen]: https://code.mediapipe.dev/codepen/face_mesh
[Fd-pen]: https://code.mediapipe.dev/codepen/face_detection
[H-pen]: https://code.mediapipe.dev/codepen/hands
[P-pen]: https://code.mediapipe.dev/codepen/pose
[Ho-demo]: https://mediapipe.dev/demo/holistic
[F-demo]: https://mediapipe.dev/demo/face_mesh
[Fd-demo]: https://mediapipe.dev/demo/face_detection
[H-demo]: https://mediapipe.dev/demo/hands
[P-demo]: https://mediapipe.dev/demo/pose
[npm]: https://www.npmjs.com/package/@mediapipe
[codepen]: https://code.mediapipe.dev/codepen

View File

@ -0,0 +1,144 @@
---
layout: default
title: MediaPipe in Python
parent: Getting Started
has_children: true
has_toc: false
nav_order: 3
---
# MediaPipe in Python
{: .no_toc }
1. TOC
{:toc}
---
## Ready-to-use Python Solutions
MediaPipe offers ready-to-use yet customizable Python solutions as a prebuilt
Python package. MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/) for Linux, macOS and Windows.
You can, for instance, activate a Python virtual environment:
```bash
$ python3 -m venv mp_env && source mp_env/bin/activate
```
Install MediaPipe Python package and start Python intepreter:
```bash
(mp_env)$ pip install mediapipe
(mp_env)$ python3
```
In Python interpreter, import the package and start using one of the solutions:
```python
import mediapipe as mp
mp_face_mesh = mp.solutions.face_mesh
```
Tip: Use command `deactivate` to later exit the Python virtual environment.
To learn more about configuration options and usage examples, please find
details in each solution via the links below:
* [MediaPipe Face Detection](../solutions/face_detection#python-solution-api)
* [MediaPipe Face Mesh](../solutions/face_mesh#python-solution-api)
* [MediaPipe Hands](../solutions/hands#python-solution-api)
* [MediaPipe Holistic](../solutions/holistic#python-solution-api)
* [MediaPipe Objectron](../solutions/objectron#python-solution-api)
* [MediaPipe Pose](../solutions/pose#python-solution-api)
## MediaPipe on Google Colab
* [MediaPipe Face Detection Colab](https://mediapipe.page.link/face_detection_py_colab)
* [MediaPipe Face Mesh Colab](https://mediapipe.page.link/face_mesh_py_colab)
* [MediaPipe Hands Colab](https://mediapipe.page.link/hands_py_colab)
* [MediaPipe Holistic Colab](https://mediapipe.page.link/holistic_py_colab)
* [MediaPipe Objectron Colab](https://mediapipe.page.link/objectron_py_colab)
* [MediaPipe Pose Colab](https://mediapipe.page.link/pose_py_colab)
* [MediaPipe Pose Classification Colab (Basic)](https://mediapipe.page.link/pose_classification_basic)
* [MediaPipe Pose Classification Colab (Extended)](https://mediapipe.page.link/pose_classification_extended)
## MediaPipe Python Framework
The ready-to-use solutions are built upon the MediaPipe Python framework, which
can be used by advanced users to run their own MediaPipe graphs in Python.
Please see [here](./python_framework.md) for more info.
## Building MediaPipe Python Package
Follow the steps below only if you have local changes and need to build the
Python package from source. Otherwise, we strongly encourage our users to simply
run `pip install mediapipe` to use the ready-to-use solutions, more convenient
and much faster.
MediaPipe PyPI currently doesn't provide aarch64 Python wheel
files. For building and using MediaPipe Python on aarch64 Linux systems such as
Nvidia Jetson and Raspberry Pi, please read
[here](https://github.com/jiuqiant/mediapipe-python-aarch64).
1. Make sure that Bazel and OpenCV are correctly installed and configured for
MediaPipe. Please see [Installation](./install.md) for how to setup Bazel
and OpenCV for MediaPipe on Linux and macOS.
2. Install the following dependencies.
Debian or Ubuntu:
```bash
$ sudo apt install python3-dev
$ sudo apt install python3-venv
$ sudo apt install -y protobuf-compiler
# If you need to build opencv from source.
$ sudo apt install cmake
```
macOS:
```bash
$ brew install protobuf
# If you need to build opencv from source.
$ brew install cmake
```
Windows:
Download the latest protoc win64 zip from
[the Protobuf GitHub repo](https://github.com/protocolbuffers/protobuf/releases),
unzip the file, and copy the protoc.exe executable to a preferred
location. Please ensure that location is added into the Path environment
variable.
3. Activate a Python virtual environment.
```bash
$ python3 -m venv mp_env && source mp_env/bin/activate
```
4. In the virtual environment, go to the MediaPipe repo directory.
5. Install the required Python packages.
```bash
(mp_env)mediapipe$ pip3 install -r requirements.txt
```
6. Generate and install MediaPipe package.
```bash
(mp_env)mediapipe$ python3 setup.py gen_protos
(mp_env)mediapipe$ python3 setup.py install --link-opencv
```
or
```bash
(mp_env)mediapipe$ python3 setup.py gen_protos
(mp_env)mediapipe$ python3 setup.py bdist_wheel
```

View File

@ -0,0 +1,268 @@
---
layout: default
title: MediaPipe Python Framework
parent: MediaPipe in Python
grand_parent: Getting Started
nav_order: 1
---
# MediaPipe Python Framework
{: .no_toc }
1. TOC
{:toc}
---
The MediaPipe Python framework grants direct access to the core components of
the MediaPipe C++ framework such as Timestamp, Packet, and CalculatorGraph,
whereas the
[ready-to-use Python solutions](./python.md#ready-to-use-python-solutions) hide
the technical details of the framework and simply return the readable model
inference results back to the callers.
MediaPipe framework sits on top of
[the pybind11 library](https://pybind11.readthedocs.io/en/stable/index.html).
The C++ core framework is exposed in Python via a C++/Python language binding.
The content below assumes that the reader already has a basic understanding of
the MediaPipe C++ framework. Otherwise, you can find useful information in
[Framework Concepts](../framework_concepts/framework_concepts.md).
### Packet
The packet is the basic data flow unit in MediaPipe. A packet consists of a
numeric timestamp and a shared pointer to an immutable payload. In Python, a
MediaPipe packet can be created by calling one of the packet creator methods in
the
[`mp.packet_creator`](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/packet_creator.cc)
module. Correspondingly, the packet payload can be retrieved by using one of the
packet getter methods in the
[`mp.packet_getter`](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/packet_getter.cc)
module. Note that the packet payload becomes **immutable** after packet
creation. Thus, the modification of the retrieved packet content doesn't affect
the actual payload in the packet. MediaPipe framework Python API supports the
most commonly used data types of MediaPipe (e.g., ImageFrame, Matrix, Protocol
Buffers, and the primitive data types) in the core binding. The comprehensive
table below shows the type mappings between the Python and the C++ data type
along with the packet creator and the content getter method for each data type
supported by the MediaPipe Python framework API.
Python Data Type | C++ Data Type | Packet Creator | Content Getter
------------------------------------ | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------
bool | bool | create_bool(True) | get_bool(packet)
int or np.intc | int_t | create_int(1) | get_int(packet)
int or np.int8 | int8_t | create_int8(2**7-1) | get_int(packet)
int or np.int16 | int16_t | create_int16(2**15-1) | get_int(packet)
int or np.int32 | int32_t | create_int32(2**31-1) | get_int(packet)
int or np.int64 | int64_t | create_int64(2**63-1) | get_int(packet)
int or np.uint8 | uint8_t | create_uint8(2**8-1) | get_uint(packet)
int or np.uint16 | uint16_t | create_uint16(2**16-1) | get_uint(packet)
int or np.uint32 | uint32_t | create_uint32(2**32-1) | get_uint(packet)
int or np.uint64 | uint64_t | create_uint64(2**64-1) | get_uint(packet)
float or np.float32 | float | create_float(1.1) | get_float(packet)
float or np.double | double | create_double(1.1) | get_float(packet)
str (UTF-8) | std::string | create_string('abc') | get_str(packet)
bytes | std::string | create_string(b'\xd0\xd0\xd0') | get_bytes(packet)
mp.Packet | mp::Packet | create_packet(p) | get_packet(packet)
List\[bool\] | std::vector\<bool\> | create_bool_vector(\[True, False\]) | get_bool_list(packet)
List\[int\] or List\[np.intc\] | int\[\] | create_int_array(\[1, 2, 3\]) | get_int_list(packet, size=10)
List\[int\] or List\[np.intc\] | std::vector\<int\> | create_int_vector(\[1, 2, 3\]) | get_int_list(packet)
List\[float\] or List\[np.float\] | float\[\] | create_float_arrary(\[0.1, 0.2\]) | get_float_list(packet, size=10)
List\[float\] or List\[np.float\] | std::vector\<float\> | create_float_vector(\[0.1, 0.2\]) | get_float_list(packet, size=10)
List\[str\] | std::vector\<std::string\> | create_string_vector(\['a'\]) | get_str_list(packet)
List\[mp.Packet\] | std::vector\<mp::Packet\> | create_packet_vector(<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\[packet1, packet2\]) | get_packet_list(p)
Mapping\[str, Packet\] | std::map<std::string, Packet> | create_string_to_packet_map(<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{'a': packet1, 'b': packet2}) | get_str_to_packet_dict(packet)
np.ndarray<br>(cv.mat and PIL.Image) | mp::ImageFrame | create_image_frame(<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;format=ImageFormat.SRGB,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;data=mat) | get_image_frame(packet)
np.ndarray | mp::Matrix | create_matrix(data) | get_matrix(packet)
Google Proto Message | Google Proto Message | create_proto(proto) | get_proto(packet)
List\[Proto\] | std::vector\<Proto\> | create_proto_vector(proto_list) | get_proto_list(packet)
It's not uncommon that users create custom C++ classes and and send those into
the graphs and calculators. To allow the custom classes to be used in Python
with MediaPipe, you may extend the Packet API for a new data type in the
following steps:
1. Write the pybind11
[class binding code](https://pybind11.readthedocs.io/en/stable/advanced/classes.html)
or
[a custom type caster](https://pybind11.readthedocs.io/en/stable/advanced/cast/custom.html?highlight=custom%20type%20caster)
for the custom type in a cc file.
```c++
#include "path/to/my_type/header/file.h"
#include "pybind11/pybind11.h"
namespace py = pybind11;
PYBIND11_MODULE(my_type_binding, m) {
// Write binding code or a custom type caster for MyType.
py::class_<MyType>(m, "MyType")
.def(py::init<>())
.def(...);
}
```
2. Create a new packet creator and getter method of the custom type in a
separate cc file.
```c++
#include "path/to/my_type/header/file.h"
#include "mediapipe/framework/packet.h"
#include "pybind11/pybind11.h"
namespace mediapipe {
namespace py = pybind11;
PYBIND11_MODULE(my_packet_methods, m) {
m.def(
"create_my_type",
[](const MyType& my_type) { return MakePacket<MyType>(my_type); });
m.def(
"get_my_type",
[](const Packet& packet) {
if(!packet.ValidateAsType<MyType>().ok()) {
PyErr_SetString(PyExc_ValueError, "Packet data type mismatch.");
return py::error_already_set();
}
return packet.Get<MyType>();
});
} // namespace mediapipe
```
3. Add two bazel build rules for the custom type binding and the new packet
methods in the BUILD file.
```
load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
pybind_extension(
name = "my_type_binding",
srcs = ["my_type_binding.cc"],
deps = [":my_type"],
)
pybind_extension(
name = "my_packet_methods",
srcs = ["my_packet_methods.cc"],
deps = [
":my_type",
"//mediapipe/framework:packet"
],
)
```
4. Build the pybind extension targets (with the suffix .so) by Bazel and move the generated dynamic libraries into one of the $LD_LIBRARY_PATH dirs.
5. Use the binding modules in Python.
```python
import my_type_binding
import my_packet_methods
packet = my_packet_methods.create_my_type(my_type_binding.MyType())
my_type = my_packet_methods.get_my_type(packet)
```
### Timestamp
Each packet contains a timestamp that is in units of microseconds. In Python,
the Packet API provides a convenience method `packet.at()` to define the numeric
timestamp of a packet. More generally, `packet.timestamp` is the packet class
property for accessing the underlying timestamp. To convert an Unix epoch to a
MediaPipe timestamp,
[the Timestamp API](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/timestamp.cc)
offers a method `mp.Timestamp.from_seconds()` for this purpose.
### ImageFrame
ImageFrame is the container for storing an image or a video frame. Formats
supported by ImageFrame are listed in
[the ImageFormat enum](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/image_frame.cc#l=170).
Pixels are encoded row-major with interleaved color components, and ImageFrame
supports uint8, uint16, and float as its data types. MediaPipe provides
[an ImageFrame Python API](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/image_frame.cc)
to access the ImageFrame C++ class. In Python, the easiest way to retrieve the
pixel data is to call `image_frame.numpy_view()` to get a numpy ndarray. Note
that the returned numpy ndarray, a reference to the internal pixel data, is
unwritable. If the callers need to modify the numpy ndarray, it's required to
explicitly call a copy operation to obtain a copy. When MediaPipe takes a numpy
ndarray to make an ImageFrame, it assumes that the data is stored contiguously.
Correspondingly, the pixel data of an ImageFrame will be realigned to be
contiguous when it's returned to the Python side.
### Graph
In MediaPipe, all processing takes places within the context of a
CalculatorGraph.
[The CalculatorGraph Python API](https://github.com/google/mediapipe/tree/master/mediapipe/python/pybind/calculator_graph.cc)
is a direct binding to the C++ CalculatorGraph class. The major difference is
the CalculatorGraph Python API raises a Python error instead of returning a
non-OK Status when an error occurs. Therefore, as a Python user, you can handle
the exceptions as you normally do. The life cycle of a CalculatorGraph contains
three stages: initialization and setup, graph run, and graph shutdown.
1. Initialize a CalculatorGraph with a CalculatorGraphConfig protobuf or binary
protobuf file, and provide callback method(s) to observe the output
stream(s).
Option 1. Initialize a CalculatorGraph with a CalculatorGraphConfig protobuf
or its text representation, and observe the output stream(s):
```python
import mediapipe as mp
config_text = """
input_stream: 'in_stream'
output_stream: 'out_stream'
node {
calculator: 'PassThroughCalculator'
input_stream: 'in_stream'
output_stream: 'out_stream'
}
"""
graph = mp.CalculatorGraph(graph_config=config_text)
output_packets = []
graph.observe_output_stream(
'out_stream',
lambda stream_name, packet:
output_packets.append(mp.packet_getter.get_str(packet)))
```
Option 2. Initialize a CalculatorGraph with with a binary protobuf file, and
observe the output stream(s).
```python
import mediapipe as mp
# resources dependency
graph = mp.CalculatorGraph(
binary_graph=os.path.join(
resources.GetRunfilesDir(), 'path/to/your/graph.binarypb'))
graph.observe_output_stream(
'out_stream',
lambda stream_name, packet: print(f'Get {packet} from {stream_name}'))
```
2. Start the graph run and feed packets into the graph.
```python
graph.start_run()
graph.add_packet_to_input_stream(
'in_stream', mp.packet_creator.create_str('abc').at(0))
rgb_img = cv2.cvtColor(cv2.imread('/path/to/your/image.png'), cv2.COLOR_BGR2RGB)
graph.add_packet_to_input_stream(
'in_stream',
mp.packet_creator.create_image_frame(format=mp.ImageFormat.SRGB,
data=rgb_img).at(1))
```
3. Close the graph after finish. You may restart the graph for another graph
run after the call to `close()`.
```python
graph.close()
```
The Python script can be run by your local Python runtime.

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.7 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 524 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 808 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.1 MiB

After

Width:  |  Height:  |  Size: 6.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 156 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 297 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 617 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 MiB

After

Width:  |  Height:  |  Size: 923 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 670 KiB

After

Width:  |  Height:  |  Size: 744 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 625 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 MiB

After

Width:  |  Height:  |  Size: 968 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1006 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 224 KiB

After

Width:  |  Height:  |  Size: 90 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

View File

@ -8,48 +8,61 @@ nav_order: 1
--------------------------------------------------------------------------------
## Cross-platform ML solutions made simple
## Live ML anywhere
[MediaPipe](https://google.github.io/mediapipe/) is the simplest way for researchers
and developers to build world-class ML solutions and applications for mobile,
desktop/cloud, web and IoT devices.
[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable
ML solutions for live and streaming media.
![accelerated.png](images/accelerated_small.png) | ![cross_platform.png](images/cross_platform_small.png)
:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------:
***End-to-End acceleration***: *built-in fast ML inference and processing accelerated even on common hardware* | ***Build one, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
![ready_to_use.png](images/ready_to_use_small.png) | ![open_source.png](images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
## ML solutions in MediaPipe
Face Detection | Face Mesh | Iris | Hands | Pose | Hair Segmentation
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---------------:
[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation)
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------:
[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic)
Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to solutions/solutions.md. -->
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md)
:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | |
See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe offers customizable Python solutions as a prebuilt Python package on
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
`pip install mediapipe`. It also provides tools for users to build their own
solutions. Please see
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
for more info.
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
@ -89,7 +102,13 @@ run code search using
## Publications
* [Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog)
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
in Google AI Blog
* [MediaPipe 3D Face Transform](https://developers.googleblog.com/2020/09/mediapipe-3d-face-transform.html)
in Google Developers Blog
* [Instant Motion Tracking With MediaPipe](https://developers.googleblog.com/2020/08/instant-motion-tracking-with-mediapipe.html)
in Google Developers Blog
* [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
in Google AI Blog

View File

@ -2,14 +2,20 @@
layout: default
title: AutoFlip (Saliency-aware Video Cropping)
parent: Solutions
nav_order: 12
nav_order: 13
---
# AutoFlip: Saliency-aware Video Cropping
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview

View File

@ -2,14 +2,20 @@
layout: default
title: Box Tracking
parent: Solutions
nav_order: 8
nav_order: 9
---
# MediaPipe Box Tracking
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -105,9 +111,8 @@ new detections to remove obsolete or duplicated boxes.
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android), [iOS](../getting_started/building_examples.md#ios)
and [desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe
examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how

View File

@ -8,8 +8,14 @@ nav_order: 1
# MediaPipe Face Detection
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -33,12 +39,174 @@ section.
![face_detection_android_gpu.gif](../images/mobile/face_detection_android_gpu.gif)
## Solution APIs
### Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
detection to be considered successful. Default to `0.5`.
### Output
Naming style may differ slightly across platforms/languages.
#### detections
Collection of detected faces, where each face is represented as a detection
proto message that contains a bounding box and 6 key points (right eye, left
eye, nose tip, mouth center, right ear tragion, and left ear tragion). The
bounding box is composed of `xmin` and `width` (both normalized to `[0.0, 1.0]`
by the image width) and `ymin` and `height` (both normalized to `[0.0, 1.0]` by
the image height). Each key point is composed of `x` and `y`, which are
normalized to `[0.0, 1.0]` by the image width and height respectively.
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [min_detection_confidence](#min_detection_confidence)
```python
import cv2
import mediapipe as mp
mp_face_detction = mp.solutions.face_detection
# For static images:
with mp_face_detection.FaceDetection(
min_detection_confidence=0.5) as face_detection:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB and process it with MediaPipe Face Detection.
results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Draw face detections of each face.
if not results.detections:
continue
annotated_image = image.copy()
for detection in results.detections:
print('Nose tip:')
print(mp_face_detection.get_key_point(
detection, mp_face_detection.FaceKeyPoint.NOSE_TIP))
mp_drawing.draw_detection(annotated_image, detection)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_face_detection.FaceDetection(
min_detection_confidence=0.5) as face_detection:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = face_detection.process(image)
# Draw the face detection annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.detections:
for detection in results.detections:
mp_drawing.draw_detection(image, detection)
cv2.imshow('MediaPipe Face Detection', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
### JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and the following usage example.
Supported configuration options:
* [minDetectionConfidence](#min_detection_confidence)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/face_detection/face_detection.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
// Draw the overlays.
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
if (results.detections.length > 0) {
drawingUtils.drawRectangle(
canvasCtx, results.detections[0].boundingBox,
{color: 'blue', lineWidth: 4, fillColor: '#00000000'});
drawingUtils.drawLandmarks(canvasCtx, results.detections[0].landmarks, {
color: 'red',
radius: 5,
});
}
canvasCtx.restore();
}
const faceDetection = new Objectron({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/objectron@0.0/${file}`;
}});
faceDetection.setOptions({
minDetectionConfidence: 0.5
});
faceDetection.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await faceDetection.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android), [iOS](../getting_started/building_examples.md#ios)
and [desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe
examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -102,9 +270,6 @@ to cross-compile and run MediaPipe examples on the
[BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs](https://arxiv.org/abs/1907.05047)
([presentation](https://docs.google.com/presentation/d/1YCtASfnYyZtH-41QvnW5iZxELFnf0MF-pPWSLGj8yjQ/present?slide=id.g5bc8aeffdd_1_0))
([poster](https://drive.google.com/file/d/1u6aB6wxDY7X2TmeUUKgFydulNtXkb3pu/view))
* For front-facing/selfie camera:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite),
[TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite)
* For back-facing camera:
[TFLite model ](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_back.tflite)
* [Model card](https://mediapipe.page.link/blazeface-mc)
* [Models and model cards](./models.md#face_detection)
* [Web demo](https://code.mediapipe.dev/codepen/face_detection)
* [Python Colab](https://mediapipe.page.link/face_detection_py_colab)

View File

@ -8,8 +8,14 @@ nav_order: 2
# MediaPipe Face Mesh
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -19,13 +25,18 @@ landmarks in real-time even on mobile devices. It employs machine learning (ML)
to infer the 3D surface geometry, requiring only a single camera input without
the need for a dedicated depth sensor. Utilizing lightweight model architectures
together with GPU acceleration throughout the pipeline, the solution delivers
real-time performance critical for live experiences. The core of the solution is
the same as what powers
[YouTube Stories](https://youtube-creators.googleblog.com/2018/11/introducing-more-ways-to-share-your.html)'
creator effects, the
[Augmented Faces API in ARCore](https://developers.google.com/ar/develop/java/augmented-faces/)
and the
[ML Kit Face Contour Detection API](https://firebase.google.com/docs/ml-kit/face-detection-concepts#contours).
real-time performance critical for live experiences.
Additionally, the solution is bundled with the Face Geometry module that bridges
the gap between the face landmark estimation and useful real-time augmented
reality (AR) applications. It establishes a metric 3D space and uses the face
landmark screen positions to estimate face geometry within that space. The face
geometry data consists of common 3D geometry primitives, including a face pose
transformation matrix and a triangular face mesh. Under the hood, a lightweight
statistical analysis method called
[Procrustes Analysis](https://en.wikipedia.org/wiki/Procrustes_analysis) is
employed to drive a robust, performant and portable logic. The analysis runs on
CPU and has a minimal speed/memory footprint on top of the ML model inference.
![face_mesh_ar_effects.gif](../images/face_mesh_ar_effects.gif) |
:-------------------------------------------------------------: |
@ -67,15 +78,15 @@ Note: To visualize a graph, copy the graph and paste it into
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
## Models
### Models
### Face Detection Model
#### Face Detection Model
The face detector is the same [BlazeFace](https://arxiv.org/abs/1907.05047)
model used in [MediaPipe Face Detection](./face_detection.md). Please refer to
[MediaPipe Face Detection](./face_detection.md) for details.
### Face Landmark Model
#### Face Landmark Model
For 3D face landmarks we employed transfer learning and trained a network with
several objectives: the network simultaneously predicts 3D landmark coordinates
@ -98,20 +109,336 @@ You can find more information about the face landmark model in this
![face_mesh_android_gpu.gif](../images/mobile/face_mesh_android_gpu.gif) |
:------------------------------------------------------------------------: |
*Fig 2. Output of MediaPipe Face Mesh: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
*Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
## Face Geometry Module
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
detection in the screen coordinate space: the X- and Y- coordinates are
normalized screen coordinates, while the Z coordinate is relative and is scaled
as the X coodinate under the
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
This format is well-suited for some applications, however it does not directly
enable the full spectrum of augmented reality (AR) features like aligning a
virtual 3D object with a detected face.
The
[Face Geometry module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
moves away from the screen coordinate space towards a metric 3D space and
provides necessary primitives to handle a detected face as a regular 3D object.
By design, you'll be able to use a perspective camera to project the final 3D
scene back into the screen coordinate space with a guarantee that the face
landmark positions are not changed.
### Key Concepts
#### Metric 3D Space
The **Metric 3D space** established within the Face Geometry module is a
right-handed orthonormal metric 3D coordinate space. Within the space, there is
a **virtual perspective camera** located at the space origin and pointed in the
negative direction of the Z-axis. In the current pipeline, it is assumed that
the input camera frames are observed by exactly this virtual camera and
therefore its parameters are later used to convert the screen landmark
coordinates back into the Metric 3D space. The *virtual camera parameters* can
be set freely, however for better results it is advised to set them as close to
the *real physical camera parameters* as possible.
![face_geometry_metric_3d_space.gif](../images/face_geometry_metric_3d_space.gif) |
:----------------------------------------------------------------------------: |
*Fig 3. A visualization of multiple key elements in the Metric 3D space.* |
#### Canonical Face Model
The **Canonical Face Model** is a static 3D model of a human face, which follows
the 468 3D face landmark topology of the
[Face Landmark Model](#face-landmark-model). The model bears two important
functions:
- **Defines metric units**: the scale of the canonical face model defines the
metric units of the Metric 3D space. A metric unit used by the
[default canonical face model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/data/canonical_face_model.fbx)
is a centimeter;
- **Bridges static and runtime spaces**: the face pose transformation matrix
is - in fact - a linear map from the canonical face model into the runtime
face landmark set estimated on each frame. This way, virtual 3D assets
modeled around the canonical face model can be aligned with a tracked face
by applying the face pose transformation matrix to them.
### Components
#### Geometry Pipeline
The **Geometry Pipeline** is a key component, which is responsible for
estimating face geometry objects within the Metric 3D space. On each frame, the
following steps are executed in the given order:
- Face landmark screen coordinates are converted into the Metric 3D space
coordinates;
- Face pose transformation matrix is estimated as a rigid linear mapping from
the canonical face metric landmark set into the runtime face metric landmark
set in a way that minimizes a difference between the two;
- A face mesh is created using the runtime face metric landmarks as the vertex
positions (XYZ), while both the vertex texture coordinates (UV) and the
triangular topology are inherited from the canonical face model.
The geometry pipeline is implemented as a MediaPipe
[calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc).
For your convenience, the face geometry pipeline calculator is bundled together
with corresponding metadata into a unified MediaPipe
[subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt).
The face geometry format is defined as a Protocol Buffer
[message](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto).
#### Effect Renderer
The **Effect Renderer** is a component, which serves as a working example of a
face effect renderer. It targets the *OpenGL ES 2.0* API to enable a real-time
performance on mobile devices and supports the following rendering modes:
- **3D object rendering mode**: a virtual object is aligned with a detected
face to emulate an object attached to the face (example: glasses);
- **Face mesh rendering mode**: a texture is stretched on top of the face mesh
surface to emulate a face painting technique.
In both rendering modes, the face mesh is first rendered as an occluder straight
into the depth buffer. This step helps to create a more believable effect via
hiding invisible elements behind the face surface.
The effect renderer is implemented as a MediaPipe
[calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/effect_renderer_calculator.cc).
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
| :---------------------------------------------------------------------: |
| *Fig 4. An example of face effects rendered by the Face Geometry Effect Renderer.* |
## Solution APIs
### Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### static_image_mode
If set to `false`, the solution treats the input images as a video stream. It
will try to detect faces in the first input images, and upon a successful
detection further localizes the face landmarks. In subsequent images, once all
[max_num_faces](#max_num_faces) faces are detected and the corresponding face
landmarks are localized, it simply tracks those landmarks without invoking
another detection until it loses track of any of the faces. This reduces latency
and is ideal for processing video frames. If set to `true`, face detection runs
on every input image, ideal for processing a batch of static, possibly
unrelated, images. Default to `false`.
#### max_num_faces
Maximum number of faces to detect. Default to `1`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
detection to be considered successful. Default to `0.5`.
#### min_tracking_confidence
Minimum confidence value (`[0.0, 1.0]`) from the landmark-tracking model for the
face landmarks to be considered tracked successfully, or otherwise face
detection will be invoked automatically on the next input image. Setting it to a
higher value can increase robustness of the solution, at the expense of a higher
latency. Ignored if [static_image_mode](#static_image_mode) is `true`, where
face detection simply runs on every image. Default to `0.5`.
### Output
Naming style may differ slightly across platforms/languages.
#### multi_face_landmarks
Collection of detected/tracked faces, where each face is represented as a list
of 468 face landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
`y` are normalized to `[0.0, 1.0]` by the image width and height respectively.
`z` represents the landmark depth with the depth at center of the head being the
origin, and the smaller the value the closer the landmark is to the camera. The
magnitude of `z` uses roughly the same scale as `x`.
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [static_image_mode](#static_image_mode)
* [max_num_faces](#max_num_faces)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
# For static images:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
static_image_mode=True,
max_num_faces=1,
min_detection_confidence=0.5) as face_mesh:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB before processing.
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print and draw face mesh landmarks on the image.
if not results.multi_face_landmarks:
continue
annotated_image = image.copy()
for face_landmarks in results.multi_face_landmarks:
print('face_landmarks:', face_landmarks)
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = face_mesh.process(image)
# Draw the face mesh annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imshow('MediaPipe FaceMesh', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
### JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and the following usage example.
Supported configuration options:
* [maxNumFaces](#max_num_faces)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh/face_mesh.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
if (results.multiFaceLandmarks) {
for (const landmarks of results.multiFaceLandmarks) {
drawConnectors(canvasCtx, landmarks, FACEMESH_TESSELATION,
{color: '#C0C0C070', lineWidth: 1});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'});
}
}
canvasCtx.restore();
}
const faceMesh = new FaceMesh({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh/${file}`;
}});
faceMesh.setOptions({
maxNumFaces: 1,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});
faceMesh.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await faceMesh.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android), [iOS](../getting_started/building_examples.md#ios) and
[desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
### Mobile
### Face Landmark Example
Face landmark example showcases real-time, cross-platform face landmark
detection. For visual reference, please refer to *Fig. 2*.
#### Mobile
* Graph:
[`mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
@ -127,7 +454,7 @@ it, for Android modify `NUM_FACES` in
and for iOS modify `kNumFaces` in
[FaceMeshGpuViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.mm).
### Desktop
#### Desktop
* Running on CPU
* Graph:
@ -143,18 +470,37 @@ and for iOS modify `kNumFaces` in
Tip: Maximum number of faces to detect/process is set to 1 by default. To change
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
### Face Effect Example
Face effect example showcases real-time mobile face effect application use case
for the Face Mesh solution. To enable a better user experience, this example
only works for a single face. For visual reference, please refer to *Fig. 4*.
#### Mobile
* Graph:
[`mediapipe/graphs/face_effect/face_effect_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_effect/face_effect_gpu.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1ccnaDnffEuIXriBZr2SK_Eu4FpO7K44s)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/faceeffect`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/faceeffect/BUILD)
* iOS target:
[`mediapipe/examples/ios/faceeffect`](http:/mediapipe/examples/ios/faceeffect/BUILD)
## Resources
* Google AI Blog:
[Real-Time AR Self-Expression with Machine Learning](https://ai.googleblog.com/2019/03/real-time-ar-self-expression-with.html)
* TensorFlow Blog:
[Face and hand tracking in the browser with MediaPipe and TensorFlow.js](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html)
* Google Developers Blog:
[MediaPipe 3D Face Transform](https://developers.googleblog.com/2020/09/mediapipe-3d-face-transform.html)
* Paper:
[Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs](https://arxiv.org/abs/1907.06724)
([poster](https://docs.google.com/presentation/d/1-LWwOMO9TzEVdrZ1CS1ndJzciRHfYDJfbSxH_ke_JRg/present?slide=id.g5986dd4b4c_4_212))
* Face detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite)
* Face landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* [Model card](https://mediapipe.page.link/facemesh-mc)
* Canonical face model:
[FBX](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/data/canonical_face_model.fbx),
[OBJ](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/data/canonical_face_model.obj),
[UV visualization](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png)
* [Models and model cards](./models.md#face_mesh)
* [Web demo](https://code.mediapipe.dev/codepen/face_mesh)
* [Python Colab](https://mediapipe.page.link/face_mesh_py_colab)

View File

@ -2,14 +2,20 @@
layout: default
title: Hair Segmentation
parent: Solutions
nav_order: 6
nav_order: 7
---
# MediaPipe Hair Segmentation
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
![hair_segmentation_android_gpu_gif](../images/mobile/hair_segmentation_android_gpu.gif)
@ -17,9 +23,8 @@ nav_order: 6
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android), [iOS](../getting_started/building_examples.md#ios)
and [desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe
examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -54,5 +59,4 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web).
[Real-time Hair segmentation and recoloring on Mobile GPUs](https://arxiv.org/abs/1907.06740)
([presentation](https://drive.google.com/file/d/1C8WYlWdDRNtU1_pYBvkkG5Z5wqYqf0yj/view))
([supplementary video](https://drive.google.com/file/d/1LPtM99Ch2ogyXYbDNpEqnUfhFq0TfLuf/view))
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite)
* [Model card](https://mediapipe.page.link/hairsegmentation-mc)
* [Models and model cards](./models.md#hair_segmentation)

View File

@ -8,8 +8,14 @@ nav_order: 4
# MediaPipe Hands
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -55,13 +61,21 @@ frame, and only when the landmark model could no longer identify hand presence
is palm detection invoked to relocalize the hand.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt),
which internally utilizes a
[palm/hand detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt),
a
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt)
and a
[renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt).
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
that uses a
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
from the
[hand landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark),
and renders using a dedicated
[hand renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt).
The
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
internally uses a
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)
from the same module and a
[palm detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt)
from the
[palm detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection).
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -77,13 +91,14 @@ To detect initial hand locations, we designed a
mobile real-time uses in a manner similar to the face detection model in
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
task: our
[model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite) has
to work across a variety of hand sizes with a large scale span (~20x) relative
to the image frame and be able to detect occluded and self-occluded hands.
Whereas faces have high contrast patterns, e.g., in the eye and mouth region,
the lack of such features in hands makes it comparatively difficult to detect
them reliably from their visual features alone. Instead, providing additional
context, like arm, body, or person features, aids accurate hand localization.
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite)
has to work across a variety of hand sizes with a large scale span (~20x)
relative to the image frame and be able to detect occluded and self-occluded
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
region, the lack of such features in hands makes it comparatively difficult to
detect them reliably from their visual features alone. Instead, providing
additional context, like arm, body, or person features, aids accurate hand
localization.
Our method addresses the above challenges using different strategies. First, we
train a palm detector instead of a hand detector, since estimating bounding
@ -105,7 +120,7 @@ just 86.22%.
### Hand Landmark Model
After the palm detection over the whole image our subsequent hand landmark
[model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite)
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite)
performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
the detected hand regions via regression, that is direct coordinate prediction.
The model learns a consistent internal hand pose representation and is robust
@ -118,16 +133,236 @@ and provide additional supervision on the nature of hand geometry, we also
render a high-quality synthetic hand model over various backgrounds and map it
to the corresponding 3D coordinates.
| ![hand_crops.png](../images/mobile/hand_crops.png) |
| :-------------------------------------------------------------------------: |
| *Fig 2. Top: Aligned hand crops passed to the tracking network with ground truth annotation. Bottom: Rendered synthetic hand images with ground truth annotation.* |
![hand_landmarks.png](../images/mobile/hand_landmarks.png) |
:--------------------------------------------------------: |
*Fig 2. 21 hand landmarks.* |
![hand_crops.png](../images/mobile/hand_crops.png) |
:-------------------------------------------------------------------------: |
*Fig 3. Top: Aligned hand crops passed to the tracking network with ground truth annotation. Bottom: Rendered synthetic hand images with ground truth annotation.* |
## Solution APIs
### Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### static_image_mode
If set to `false`, the solution treats the input images as a video stream. It
will try to detect hands in the first input images, and upon a successful
detection further localizes the hand landmarks. In subsequent images, once all
[max_num_hands](#max_num_hands) hands are detected and the corresponding hand
landmarks are localized, it simply tracks those landmarks without invoking
another detection until it loses track of any of the hands. This reduces latency
and is ideal for processing video frames. If set to `true`, hand detection runs
on every input image, ideal for processing a batch of static, possibly
unrelated, images. Default to `false`.
#### max_num_hands
Maximum number of hands to detect. Default to `2`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the
detection to be considered successful. Default to `0.5`.
#### min_tracking_confidence:
Minimum confidence value (`[0.0, 1.0]`) from the landmark-tracking model for the
hand landmarks to be considered tracked successfully, or otherwise hand
detection will be invoked automatically on the next input image. Setting it to a
higher value can increase robustness of the solution, at the expense of a higher
latency. Ignored if [static_image_mode](#static_image_mode) is `true`, where
hand detection simply runs on every image. Default to `0.5`.
### Output
Naming style may differ slightly across platforms/languages.
#### multi_hand_landmarks
Collection of detected/tracked hands, where each hand is represented as a list
of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
`y` are normalized to `[0.0, 1.0]` by the image width and height respectively.
`z` represents the landmark depth with the depth at the wrist being the origin,
and the smaller the value the closer the landmark is to the camera. The
magnitude of `z` uses roughly the same scale as `x`.
#### multi_handedness
Collection of handedness of the detected/tracked hands (i.e. is it a left or
right hand). Each hand is composed of `label` and `score`. `label` is a string
of value either `"Left"` or `"Right"`. `score` is the estimated probability of
the predicted handedness and is always greater than or equal to `0.5` (and the
opposite handedness has an estimated probability of `1 - score`).
Note that handedness is determined assuming the input image is mirrored, i.e.,
taken with a front-facing/selfie camera with images flipped horizontally. If it
is not the case, please swap the handedness output in the application.
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [static_image_mode](#static_image_mode)
* [max_num_hands](#max_num_hands)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
# For static images:
with mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5) as hands:
for idx, file in enumerate(file_list):
# Read an image, flip it around y-axis for correct handedness output (see
# above).
image = cv2.flip(cv2.imread(file), 1)
# Convert the BGR image to RGB before processing.
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print handedness and draw hand landmarks on the image.
print('Handedness:', results.multi_handedness)
if not results.multi_hand_landmarks:
continue
image_height, image_width, _ = image.shape
annotated_image = image.copy()
for hand_landmarks in results.multi_hand_landmarks:
print('hand_landmarks:', hand_landmarks)
print(
f'Index finger tip coordinates: (',
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
)
mp_drawing.draw_landmarks(
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = hands.process(image)
# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imshow('MediaPipe Hands', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
### JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and a [fun application], and the following usage example.
Supported configuration options:
* [maxNumHands](#max_num_hands)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/hands/hands.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
if (results.multiHandLandmarks) {
for (const landmarks of results.multiHandLandmarks) {
drawConnectors(canvasCtx, landmarks, HAND_CONNECTIONS,
{color: '#00FF00', lineWidth: 5});
drawLandmarks(canvasCtx, landmarks, {color: '#FF0000', lineWidth: 2});
}
}
canvasCtx.restore();
}
const hands = new Hands({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/hands/${file}`;
}});
hands.setOptions({
maxNumHands: 2,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});
hands.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await hands.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android), [iOS](../getting_started/building_examples.md#ios)
and [desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe
examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -146,34 +381,11 @@ to visualize its associated subgraphs, please see
* iOS target:
[`mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD)
#### With Multi-hand Support
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1Wk6V9EVaz1ks_MInPqqVGvvJD01SGXDc)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu:multihandtrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD)
* iOS target:
[`mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu/BUILD)
There are two key differences between this graph and that in the
[main example](#main-example) (which handles only one hand):
1. There is a `NormalizedRectVectorHasMinSize` calculator, that checks if in
input vector of `NormalizedRect` objects has a minimum size equal to `N`. In
this graph, if the vector contains fewer than `N` objects,
`MultiHandDetection` subgraph runs. Otherwise, the `GateCalculator` doesn't
send any image packets to the `MultiHandDetection` subgraph. This way, the
main graph is efficient in that it avoids running the costly hand detection
step when there are already `N` hands in the frame.
2. The `MergeCalculator` has been replaced by the `AssociationNormRect`
calculator. This `AssociationNormRect` takes as input a vector of
`NormalizedRect` objects from the `MultiHandDetection` subgraph on the
current frame, and a vector of `NormalizedRect` objects from the
`MultiHandLandmark` subgraph from the previous frame, and performs an
association operation between these objects. This calculator ensures that
the output vector doesn't contain overlapping regions based on the specified
`min_similarity_threshold`.
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
it, for Android modify `NUM_HANDS` in
[MainActivity.java](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java),
and for iOS modify `kNumHands` in
[HandTrackingViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm).
#### Palm/Hand Detection Only (no landmarks)
@ -187,8 +399,6 @@ There are two key differences between this graph and that in the
### Desktop
#### Main Example
* Running on CPU
* Graph:
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt)
@ -196,26 +406,12 @@ There are two key differences between this graph and that in the
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_gpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
#### With Multi-hand Support
* Running on CPU
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live)
* Target:
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
* Target:
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
### Web
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
## Resources
@ -226,10 +422,7 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web).
* Paper:
[MediaPipe Hands: On-device Real-time Hand Tracking](https://arxiv.org/abs/2006.10214)
([presentation](https://www.youtube.com/watch?v=I-UOrvxxXEk))
* Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc)
* [Models and model cards](./models.md#hands)
* [Web demo](https://code.mediapipe.dev/codepen/hands)
* [Fun application](https://code.mediapipe.dev/codepen/defrost)
* [Python Colab](https://mediapipe.page.link/hands_py_colab)

412
docs/solutions/holistic.md Normal file
View File

@ -0,0 +1,412 @@
---
layout: default
title: Holistic
parent: Solutions
nav_order: 6
---
# MediaPipe Holistic
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
Live perception of simultaneous [human pose](./pose.md),
[face landmarks](./face_mesh.md), and [hand tracking](./hands.md) in real-time
on mobile devices can enable various modern life applications: fitness and sport
analysis, gesture control and sign language recognition, augmented reality
try-on and effects. MediaPipe already offers fast and accurate, yet separate,
solutions for these tasks. Combining them all in real-time into a semantically
consistent end-to-end solution is a uniquely difficult problem requiring
simultaneous inference of multiple, dependent neural networks.
![holistic_sports_and_gestures_example.gif](../images/mobile/holistic_sports_and_gestures_example.gif) |
:----------------------------------------------------------------------------------------------------: |
*Fig 1. Example of MediaPipe Holistic.* |
## ML Pipeline
The MediaPipe Holistic pipeline integrates separate models for
[pose](./pose.md), [face](./face_mesh.md) and [hand](./hands.md) components,
each of which are optimized for their particular domain. However, because of
their different specializations, the input to one component is not well-suited
for the others. The pose estimation model, for example, takes a lower, fixed
resolution video frame (256x256) as input. But if one were to crop the hand and
face regions from that image to pass to their respective models, the image
resolution would be too low for accurate articulation. Therefore, we designed
MediaPipe Holistic as a multi-stage pipeline, which treats the different regions
using a region appropriate image resolution.
First, we estimate the human pose (top of Fig 2) with [BlazePose](./pose.md)s
pose detector and subsequent landmark model. Then, using the inferred pose
landmarks we derive three regions of interest (ROI) crops for each hand (2x) and
the face, and employ a re-crop model to improve the ROI. We then crop the
full-resolution input frame to these ROIs and apply task-specific face and hand
models to estimate their corresponding landmarks. Finally, we merge all
landmarks with those of the pose model to yield the full 540+ landmarks.
![holistic_pipeline_example.jpg](../images/mobile/holistic_pipeline_example.jpg) |
:------------------------------------------------------------------------------: |
*Fig 2. MediaPipe Holistic Pipeline Overview.* |
To streamline the identification of ROIs for face and hands, we utilize a
tracking approach similar to the one we use for standalone
[face](./face_mesh.md) and [hand](./hands.md) pipelines. It assumes that the
object doesn't move significantly between frames and uses estimation from the
previous frame as a guide to the object region on the current one. However,
during fast movements, the tracker can lose the target, which requires the
detector to re-localize it in the image. MediaPipe Holistic uses
[pose](./pose.md) prediction (on every frame) as an additional ROI prior to
reduce the response time of the pipeline when reacting to fast movements. This
also enables the model to retain semantic consistency across the body and its
parts by preventing a mixup between left and right hands or body parts of one
person in the frame with another.
In addition, the resolution of the input frame to the pose model is low enough
that the resulting ROIs for face and hands are still too inaccurate to guide the
re-cropping of those regions, which require a precise input crop to remain
lightweight. To close this accuracy gap we use lightweight face and hand re-crop
models that play the role of
[spatial transformers](https://arxiv.org/abs/1506.02025) and cost only ~10% of
corresponding model's inference time.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/holistic_tracking/holistic_tracking_gpu.pbtxt)
that uses a
[holistic landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt)
from the
[holistic landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark)
and renders using a dedicated
[holistic renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/holistic_tracking/holistic_tracking_to_render_data.pbtxt).
The
[holistic landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt)
internally uses a
[pose landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark)
,
[hand landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark)
and
[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/).
Please check them for implementation details.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
## Models
### Landmark Models
MediaPipe Holistic utilizes the pose, face and hand landmark models in
[MediaPipe Pose](./pose.md), [MediaPipe Face Mesh](./face_mesh.md) and
[MediaPipe Hands](./hands.md) respectively to generate a total of 543 landmarks
(33 pose landmarks, 468 face landmarks, and 21 hand landmarks per hand).
### Hand Recrop Model
For cases when the accuracy of the pose model is low enough that the resulting
ROIs for hands are still too inaccurate we run the additional lightweight hand
re-crop model that play the role of
[spatial transformer](https://arxiv.org/abs/1506.02025) and cost only ~10% of
hand model inference time.
## Solution APIs
### Cross-platform Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### static_image_mode
If set to `false`, the solution treats the input images as a video stream. It
will try to detect the most prominent person in the very first images, and upon
a successful detection further localizes the pose and other landmarks. In
subsequent images, it then simply tracks those landmarks without invoking
another detection until it loses track, on reducing computation and latency. If
set to `true`, person detection runs every input image, ideal for processing a
batch of static, possibly unrelated, images. Default to `false`.
#### upper_body_only
If set to `true`, the solution outputs only the 25 upper-body pose landmarks
(535 in total) instead of the full set of 33 pose landmarks (543 in total). Note
that upper-body-only prediction may be more accurate for use cases where the
lower-body parts are mostly out of view. Default to `false`.
#### smooth_landmarks
If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
detection to be considered successful. Default to `0.5`.
#### min_tracking_confidence
Minimum confidence value (`[0.0, 1.0]`) from the landmark-tracking model for the
pose landmarks to be considered tracked successfully, or otherwise person
detection will be invoked automatically on the next input image. Setting it to a
higher value can increase robustness of the solution, at the expense of a higher
latency. Ignored if [static_image_mode](#static_image_mode) is `true`, where
person detection simply runs on every image. Default to `0.5`.
### Output
Naming style may differ slightly across platforms/languages.
#### pose_landmarks
A list of pose landmarks. Each landmark consists of the following:
* `x` and `y`: Landmark coordinates normalized to `[0.0, 1.0]` by the image
width and height respectively.
* `z`: Should be discarded as currently the model is not fully trained to
predict depth, but this is something on the roadmap.
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
landmark being visible (present and not occluded) in the image.
#### face_landmarks
A list of 468 face landmarks. Each landmark consists of `x`, `y` and `z`. `x`
and `y` are normalized to `[0.0, 1.0]` by the image width and height
respectively. `z` represents the landmark depth with the depth at center of the
head being the origin, and the smaller the value the closer the landmark is to
the camera. The magnitude of `z` uses roughly the same scale as `x`.
#### left_hand_landmarks
A list of 21 hand landmarks on the left hand. Each landmark consists of `x`, `y`
and `z`. `x` and `y` are normalized to `[0.0, 1.0]` by the image width and
height respectively. `z` represents the landmark depth with the depth at the
wrist being the origin, and the smaller the value the closer the landmark is to
the camera. The magnitude of `z` uses roughly the same scale as `x`.
#### right_hand_landmarks
A list of 21 hand landmarks on the right hand, in the same representation as
[left_hand_landmarks](#left_hand_landmarks).
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [static_image_mode](#static_image_mode)
* [upper_body_only](#upper_body_only)
* [smooth_landmarks](#smooth_landmarks)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic
# For static images:
with mp_holistic.Holistic(static_image_mode=True) as holistic:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
image_height, image_width, _ = image.shape
# Convert the BGR image to RGB before processing.
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if results.pose_landmarks:
print(
f'Nose coordinates: ('
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
)
# Draw pose, left and right hands, and face landmarks on the image.
annotated_image = image.copy()
mp_drawing.draw_landmarks(
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
mp_drawing.draw_landmarks(
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
# Use mp_holistic.UPPER_BODY_POSE_CONNECTIONS for drawing below when
# upper_body_only is set to True.
mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = holistic.process(image)
# Draw landmark annotation on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks(
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
mp_drawing.draw_landmarks(
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
cv2.imshow('MediaPipe Holistic', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
### JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and the following usage example.
Supported configuration options:
* [upperBodyOnly](#upper_body_only)
* [smoothLandmarks](#smooth_landmarks)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/holistic/holistic.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks,
{color: '#FF0000', lineWidth: 2});
drawConnectors(canvasCtx, results.faceLandmarks, FACEMESH_TESSELATION,
{color: '#C0C0C070', lineWidth: 1});
drawConnectors(canvasCtx, results.leftHandLandmarks, HAND_CONNECTIONS,
{color: '#CC0000', lineWidth: 5});
drawLandmarks(canvasCtx, results.leftHandLandmarks,
{color: '#00FF00', lineWidth: 2});
drawConnectors(canvasCtx, results.rightHandLandmarks, HAND_CONNECTIONS,
{color: '#00CC00', lineWidth: 5});
drawLandmarks(canvasCtx, results.rightHandLandmarks,
{color: '#FF0000', lineWidth: 2});
canvasCtx.restore();
}
const holistic = new Holistic({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/holistic/${file}`;
}});
holistic.setOptions({
upperBodyOnly: false,
smoothLandmarks: true,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});
holistic.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await holistic.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md), and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
### Mobile
* Graph:
[`mediapipe/graphs/holistic_tracking/holistic_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/holistic_tracking/holistic_tracking_gpu.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1o-Trp2GIRitA0OvmZWUQjVMa476xpfgK/view?usp=sharing)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/holistictrackinggpu:holistictrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/holistictrackinggpu/BUILD)
* iOS target:
[`mediapipe/examples/ios/holistictrackinggpu:HolisticTrackingGpuApp`](http:/mediapipe/examples/ios/holistictrackinggpu/BUILD)
### Desktop
Please first see general instructions for [desktop](../getting_started/cpp.md)
on how to build MediaPipe examples.
* Running on CPU
* Graph:
[`mediapipe/graphs/holistic_tracking/holistic_tracking_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/holistic_tracking/holistic_tracking_cpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/holistic_tracking:holistic_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/holistic_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/holistic_tracking/holistic_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/holistic_tracking/holistic_tracking_gpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/holistic_tracking:holistic_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/holistic_tracking/BUILD)
## Resources
* Google AI Blog:
[MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
* [Models and model cards](./models.md#holistic)
* [Web demo](https://code.mediapipe.dev/codepen/holistic)
* [Python Colab](https://mediapipe.page.link/holistic_py_colab)

View File

@ -2,14 +2,20 @@
layout: default
title: Instant Motion Tracking
parent: Solutions
nav_order: 9
nav_order: 10
---
# MediaPipe Instant Motion Tracking
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -104,18 +110,36 @@ and connected camera.
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android) on how to build
MediaPipe examples.
[Android](../getting_started/android.md) on how to build MediaPipe examples.
* Graph: [mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt)
* Android target (or download prebuilt [ARM64 APK](https://drive.google.com/file/d/1KnaBBoKpCHR73nOBJ4fL_YdWVTAcwe6L/view?usp=sharing)):
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking:instantmotiontracking`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/BUILD)
* Assets rendered by the [GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) must be preprocessed into an OpenGL-ready custom .uuu format. This can be done
for user assets as follows:
> First run
>
> ```shell
> ./mediapipe/graphs/object_detection_3d/obj_parser/obj_cleanup.sh [INPUT_DIR] [INTERMEDIATE_OUTPUT_DIR]
> ```
> and then run
>
> ```build
> bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR]
> ```
> INPUT_DIR should be the folder with initial asset .obj files to be processed,
> and OUTPUT_DIR is the folder where the processed asset .uuu file will be placed.
>
> Note: ObjParser combines all .obj files found in the given directory into a
> single .uuu animation file, using the order given by sorting the filenames alphanumerically. Also the ObjParser directory inputs must be given as
> absolute paths, not relative paths. See parser utility library at [`mediapipe/graphs/object_detection_3d/obj_parser/`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/obj_parser/) for more details.
## Resources
* Google Developers Blog:
[Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog)
[Instant Motion Tracking With MediaPipe](https://developers.googleblog.com/2020/08/instant-motion-tracking-with-mediapipe.html)
* Google AI Blog:
[The Instant Motion Tracking Behind Motion Stills AR](https://ai.googleblog.com/2018/02/the-instant-motion-tracking-behind.html)
* Paper:

View File

@ -8,8 +8,14 @@ nav_order: 3
# MediaPipe Iris
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -116,10 +122,8 @@ along with some simple geometric arguments. For more details please refer to our
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android),
[iOS](../getting_started/building_examples.md#ios) and
[desktop](../getting_started/building_examples.md#desktop) on how to build
MediaPipe examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md) and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -140,9 +144,8 @@ to visualize its associated subgraphs, please see
#### Live Camera Input
Please first see general instructions for
[desktop](../getting_started/building_examples.md#desktop) on how to build
MediaPipe examples.
Please first see general instructions for [desktop](../getting_started/cpp.md)
on how to build MediaPipe examples.
* Running on CPU
* Graph:
@ -199,11 +202,4 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web).
* Paper:
[Real-time Pupil Tracking from Monocular Video for Digital Puppetry](https://arxiv.org/abs/2006.11341)
([presentation](https://youtu.be/cIhXkiiapQI))
* Face detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite)
* Face landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* Iris landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark.tflite)
* [Model card](https://mediapipe.page.link/iris-mc)
* [Models and model cards](./models.md#iris)

View File

@ -2,14 +2,20 @@
layout: default
title: KNIFT (Template-based Feature Matching)
parent: Solutions
nav_order: 11
nav_order: 12
---
# MediaPipe KNIFT
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
@ -67,7 +73,7 @@ you'd like to use your own template images, see
![template_matching_mobile_template.jpg](../images/mobile/template_matching_mobile_template.jpg)
Please first see general instructions for
[Android](../getting_started/building_examples.md#android) on how to build MediaPipe examples.
[Android](../getting_started/android.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -139,7 +145,4 @@ to run regular TFLite inference.
* Google Developers Blog:
[MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html)
* [TFLite model for up to 200 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float.tflite)
* [TFLite model for up to 400 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float_400.tflite)
* [TFLite model for up to 1000 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float_1k.tflite)
* [Model card](https://mediapipe.page.link/knift-mc)
* [Models and model cards](./models.md#knift)

View File

@ -2,14 +2,20 @@
layout: default
title: Dataset Preparation with MediaSequence
parent: Solutions
nav_order: 13
nav_order: 14
---
# Dataset Preparation with MediaSequence
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview

90
docs/solutions/models.md Normal file
View File

@ -0,0 +1,90 @@
---
layout: default
title: Models and Model Cards
parent: Solutions
nav_order: 30
---
# MediaPipe Models and Model Cards
{: .no_toc }
1. TOC
{:toc}
---
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
* Face detection model for front-facing/selfie camera:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite),
[TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite)
* Face detection model for back-facing camera:
[TFLite model ](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_back.tflite)
* [Model card](https://mediapipe.page.link/blazeface-mc)
### [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh)
* Face landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* [Model card](https://mediapipe.page.link/facemesh-mc)
### [Iris](https://google.github.io/mediapipe/solutions/iris)
* Iris landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark.tflite)
* [Model card](https://mediapipe.page.link/iris-mc)
### [Hands](https://google.github.io/mediapipe/solutions/hands)
* Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite),
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc), [Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
### [Pose](https://google.github.io/mediapipe/solutions/pose)
* Pose detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection.tflite)
* Full-body pose landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_full_body.tflite)
* Upper-body pose landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite)
* [Model card](https://mediapipe.page.link/blazepose-mc)
### [Holistic](https://google.github.io/mediapipe/solutions/holistic)
* Hand recrop model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/hand_recrop.tflite)
### [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation)
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite)
* [Model card](https://mediapipe.page.link/hairsegmentation-mc)
### [Object Detection](https://google.github.io/mediapipe/solutions/object_detection)
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/ssdlite_object_detection.tflite)
* [TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/object-detector-quantized_edgetpu.tflite)
* [TensorFlow model](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_saved_model)
* [Model information](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_saved_model/README.md)
### [Objectron](https://google.github.io/mediapipe/solutions/objectron)
* [TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_sneakers.tflite)
* [TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_chair.tflite)
* [TFLite model for cameras](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_camera.tflite)
* [TFLite model for cups](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_cup.tflite)
* [Single-stage TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite)
* [Single-stage TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite)
* [Model card](https://mediapipe.page.link/objectron-mc)
### [KNIFT](https://google.github.io/mediapipe/solutions/knift)
* [TFLite model for up to 200 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float.tflite)
* [TFLite model for up to 400 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float_400.tflite)
* [TFLite model for up to 1000 keypoints](https://github.com/google/mediapipe/tree/master/mediapipe/models/knift_float_1k.tflite)
* [Model card](https://mediapipe.page.link/knift-mc)

View File

@ -2,14 +2,20 @@
layout: default
title: Object Detection
parent: Solutions
nav_order: 7
nav_order: 8
---
# MediaPipe Object Detection
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
![object_detection_android_gpu.gif](../images/mobile/object_detection_android_gpu.gif)
@ -24,8 +30,8 @@ to visualize its associated subgraphs, please see
### Mobile
Please first see general instructions for
[Android](../getting_started/building_examples.md#android) and
[iOS](../getting_started/building_examples.md#ios) on how to build MediaPipe examples.
[Android](../getting_started/android.md) and [iOS](../getting_started/ios.md) on
how to build MediaPipe examples.
#### GPU Pipeline
@ -56,8 +62,8 @@ same configuration as the GPU pipeline, runs entirely on CPU.
#### Live Camera Input
Please first see general instructions for
[desktop](../getting_started/building_examples.md#desktop) on how to build MediaPipe examples.
Please first see general instructions for [desktop](../getting_started/cpp.md)
on how to build MediaPipe examples.
* Graph:
[`mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt)
@ -144,7 +150,4 @@ to cross-compile and run MediaPipe examples on the
## Resources
* [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/ssdlite_object_detection.tflite)
* [TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/object-detector-quantized_edgetpu.tflite)
* [TensorFlow model](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_saved_model)
* [Model information](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_saved_model/README.md)
* [Models and model cards](./models.md#object_detection)

View File

@ -2,26 +2,31 @@
layout: default
title: Objectron (3D Object Detection)
parent: Solutions
nav_order: 10
nav_order: 11
---
# MediaPipe Objectron
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
MediaPipe Objectron is a mobile real-time 3D object detection solution for
everyday objects. It detects objects in 2D images, and estimates their poses and
sizes through a machine learning (ML) model, trained on a newly created 3D
dataset.
everyday objects. It detects objects in 2D images, and estimates their poses
through a machine learning (ML) model, trained on the [Objectron dataset](https://github.com/google-research-datasets/Objectron).
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif)
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------:
*Fig 1(a). Objectron for Shoes.* | *Fig 1(b). Objectron for Chairs.*
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif) | ![objectron_camera_android_gpu.gif](../images/mobile/objectron_camera_android_gpu.gif) | ![objectron_cup_android_gpu.gif](../images/mobile/objectron_cup_android_gpu.gif)
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------:
*Fig 1a. Shoe Objectron* | *Fig 1b. Chair Objectron* | *Fig 1c. Camera Objectron* | *Fig 1d. Cup Objectron*
Object detection is an extensively studied computer vision problem, but most of
the research has focused on
@ -85,15 +90,42 @@ able to increase the accuracy by about 10%.
:-------------------------------------------------------------------------------------------: |
*Fig 4. An example of AR synthetic data generation. The virtual white-brown cereal box is rendered into the real scene, next to the real blue book.* |
## ML Model for 3D Object Detection
## ML Pipelines for 3D Object Detection
We built two ML pipelines to predict the 3D bounding box of an object from a
single RGB image: one is a two-stage pipeline and the other is a single-stage
pipeline. The two-stage pipeline is 3x faster than the single-stage pipeline
with similar or better accuracy. The single stage pipeline is good at detecting
multiple objects, whereas the two stage pipeline is good for a single dominant
object.
### Two-stage Pipeline
Our two-stage pipeline is illustrated by the diagram in Fig 5. The first stage
uses an object detector to find the 2D crop of the object. The second stage
takes the image crop and estimates the 3D bounding box. At the same time, it
also computes the 2D crop of the object for the next frame, such that the object
detector does not need to run every frame.
![objectron_network_architecture.png](../images/objectron_2stage_network_architecture.png) |
:----------------------------------------------------------------------------------------: |
*Fig 5. Network architecture and post-processing for two-stage 3D object detection.* |
We can use any 2D object detector for the first stage. In this solution, we use
[TensorFlow Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection) trained
with the [Open Images dataset](https://storage.googleapis.com/openimages/web/index.html).
The second stage 3D bounding box predictor we released runs 83FPS on Adreno 650
mobile GPU.
### Single-stage Pipeline
![objectron_network_architecture.png](../images/objectron_network_architecture.png) |
:---------------------------------------------------------------------------------: |
*Fig 5. Network architecture and post-processing for 3D object detection.* |
*Fig 6. Network architecture and post-processing for single-stage 3D object detection.* |
We [built a single-stage model](https://arxiv.org/abs/2003.03522) to predict the
pose and physical size of an object from a single RGB image. The model backbone
has an encoder-decoder architecture, built upon
Our [single-stage pipeline](https://arxiv.org/abs/2003.03522) is illustrated by
the diagram in Fig 6, the model backbone has an encoder-decoder architecture,
built upon
[MobileNetv2](https://ai.googleblog.com/2018/04/mobilenetv2-next-generation-of-on.html).
We employ a multi-task learning approach, jointly predicting an object's shape
with detection and regression. The shape task predicts the object's shape
@ -114,9 +146,9 @@ size of the object. The model is light enough to run real-time on mobile devices
![objectron_sample_network_results.png](../images/objectron_sample_network_results.png) |
:-------------------------------------------------------------------------------------: |
*Fig 6. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
*Fig 7. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
## Detection and Tracking Pipeline
#### Detection and Tracking
When the model is applied to every frame captured by the mobile device, it can
suffer from jitter due to the ambiguity of the 3D bounding box estimated in each
@ -130,11 +162,11 @@ temporally consistent, reducing the jitter.
The Objectron 3D object detection and tracking pipeline is implemented as a
MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt),
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt),
which internally uses a
[detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt)
[detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/objectron_detection_1stage_gpu.pbtxt)
and a
[tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_tracking_gpu.pbtxt).
[tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/objectron_tracking_1stage_gpu.pbtxt).
The detection subgraph performs ML inference only once every few frames to
reduce computation load, and decodes the output tensor to a FrameAnnotation that
contains nine keypoints: the 3D bounding box's center and its eight vertices.
@ -147,43 +179,357 @@ new detection becomes available from the detection subgraph, the tracking
subgraph is also responsible for consolidation between the detection and
tracking results, based on the area of overlap.
## Objectron Dataset
We also released our [Objectron dataset](http://objectron.dev), with which we
trained our 3D object detection models. The technical details of the Objectron
dataset, including usage and tutorials, are available on
the [dataset website](https://github.com/google-research-datasets/Objectron/).
## Solution APIs
### Cross-platform Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### static_image_mode
If set to `false`, the solution treats the input images as a video stream. It
will try to detect objects in the very first images, and upon successful
detection further localizes the 3D bounding box landmarks. In subsequent images,
once all [max_num_objects](#max_num_objects) objects are detected and the
corresponding 3D bounding box landmarks are localized, it simply tracks those
landmarks without invoking another detection until it loses track of any of the
objects. This reduces latency and is ideal for processing video frames. If set
to `true`, object detection runs every input image, ideal for processing a batch
of static, possibly unrelated, images. Default to `false`.
#### max_num_objects
Maximum number of objects to detect. Default to `5`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the object-detection model for the
detection to be considered successful. Default to `0.5`.
#### min_tracking_confidence
Minimum confidence value (`[0.0, 1.0]`) from the landmark-tracking model for the
3D bounding box landmarks to be considered tracked successfully, or otherwise
object detection will be invoked automatically on the next input image. Setting
it to a higher value can increase robustness of the solution, at the expense of
a higher latency. Ignored if [static_image_mode](#static_image_mode) is `true`,
where object detection simply runs on every image. Default to `0.99`.
#### model_name
Name of the model to use for predicting 3D bounding box landmarks. Currently supports
`{'Shoe', 'Chair', 'Cup', 'Camera'}`.
#### focal_length
Camera focal length `(fx, fy)`, by default is defined in
[NDC space](#ndc-space). To use focal length `(fx_pixel, fy_pixel)` in
[pixel space](#pixel-space), users should provide `image_size` = `(image_width,
image_height)` to enable conversions inside the API. For further details about
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems).
#### principal_point
Camera principal point `(px, py)`, by default is defined in
[NDC space](#ndc-space). To use principal point `(px_pixel, py_pixel)` in
[pixel space](#pixel-space), users should provide `image_size` = `(image_width,
image_height)` to enable conversions inside the API. For further details about
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems).
#### image_size
(**Optional**) size `(image_width, image_height)` of the input image, **ONLY**
needed when use `focal_length` and `principal_point` in pixel space.
### Output
<!-- Naming style may differ slightly across platforms/languages. -->
#### detected_objects
A list of detected 3D bounding box. Each 3D bounding box consists of the
following:
* `landmarks_2d` : 2D landmarks of the object's 3D bounding box. The landmark
coordinates are normalized to `[0.0, 1.0]` by the image width and height
respectively.
* `landmarks_3d` : 3D landmarks of the object's 3D bounding box. The landmark
coordinates are represented in [camera coordinate](#camera-coordinate)
frame.
* `rotation` : rotation matrix from object coordinate frame to camera
coordinate frame.
* `translation` : translation vector from object coordinate frame to camera
coordinate frame.
* `scale` : relative scale of the object along `x`, `y` and `z` directions.
## Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [static_image_mode](#static_image_mode)
* [max_num_objects](#max_num_objects)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
* [model_name](#model_name)
* [focal_length](#focal_length)
* [principal_point](#principal_point)
* [image_size](#image_size)
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_objectron = mp.solutions.objectron
# For static images:
with mp_objectron.Objectron(static_image_mode=True,
max_num_objects=5,
min_detection_confidence=0.5,
model_name='Shoe') as objectron:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB and process it with MediaPipe Objectron.
results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Draw box landmarks.
if not results.detected_objects:
print(f'No box landmarks detected on {file}')
continue
print(f'Box landmarks of {file}:')
annotated_image = image.copy()
for detected_object in results.detected_objects:
mp_drawing.draw_landmarks(
annotated_image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
mp_drawing.draw_axis(annotated_image, detected_object.rotation,
detected_object.translation)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_objectron.Objectron(static_image_mode=False,
max_num_objects=5,
min_detection_confidence=0.5,
min_tracking_confidence=0.99,
model_name='Shoe') as objectron:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Convert the BGR image to RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = objectron.process(image)
# Draw the box landmarks on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.detected_objects:
for detected_object in results.detected_objects:
mp_drawing.draw_landmarks(
image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
mp_drawing.draw_axis(image, detected_object.rotation,
detected_object.translation)
cv2.imshow('MediaPipe Objectron', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android) and
[iOS](../getting_started/building_examples.md#ios) on how to build MediaPipe examples.
[Android](../getting_started/android.md) and [iOS](../getting_started/ios.md) on
how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
### Objectron for Shoes
### Two-stage Objectron
* Graph:
[`mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1S0K4hbWt3o31FfQ4QU3Rz7IHrvOUMx1d)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
* iOS target: Not available
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
### Objectron for Chairs
* Graph:
[`mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/chair_classic_occlusion_tracking.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1MM8K-13bXLCVS1EHQ-KgkVyEahEPrKej)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
and add `--define chair=true` to the build command, i.e.,
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
Build for **shoes** (default) with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1ANW9WDOCb8QO1r8gDC03A4UgrPkICdPP/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **chairs** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1lcUv1TBnv_SxnKSQwdOqbdLa9mkaTJHy/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define chair=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **cups** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1bf77KDkowwrduleiC9B1M1XnEhjnOQbX/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define cup=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **cameras** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GM7lPO-s5URVxIzQur1bLsionEJs3yIl/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define camera=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
* iOS target: Not available
### Single-stage Objectron
* Graph:
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
* Android target:
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
Build with **single-stage** model for **shoes** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1MvaEg4dkvKN8jAU1Z2GtudyXi1rQHYsE/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define shoe_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build with **single-stage** model for **chairs** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GJL4z3jr-wD1jMHGd4NBfOG-Yoq5t167/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define chair_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
* iOS target: Not available
### Assets
Example app bounding boxes are rendered with [GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) using a parsing of the sequenced .obj file
format into a custom .uuu format. This can be done for user assets as follows:
> First run
>
> ```shell
> ./mediapipe/graphs/object_detection_3d/obj_parser/obj_cleanup.sh [INPUT_DIR] [INTERMEDIATE_OUTPUT_DIR]
> ```
> and then run
>
> ```build
> bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR]
> ```
> INPUT_DIR should be the folder with initial asset .obj files to be processed,
> and OUTPUT_DIR is the folder where the processed asset .uuu file will be placed.
>
> Note: ObjParser combines all .obj files found in the given directory into a
> single .uuu animation file, using the order given by sorting the filenames alphanumerically. Also the ObjParser directory inputs must be given as
> absolute paths, not relative paths. See parser utility library at [`mediapipe/graphs/object_detection_3d/obj_parser/`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/obj_parser/) for more details.
### Coordinate Systems
#### Object Coordinate
Each object has its object coordinate frame. We use the below object coordinate
definition, with `+x` pointing right, `+y` pointing up and `+z` pointing front,
origin is at the center of the 3D bounding box.
![box_coordinate.svg](../images/box_coordinate.svg)
#### Camera Coordinate
A 3D object is parameterized by its `scale` and `rotation`, `translation` with
regard to the camera coordinate frame. In this API we use the below camera
coordinate definition, with `+x` pointing right, `+y` pointing up and `-z`
pointing to the scene.
![camera_coordinate.svg](../images/camera_coordinate.svg)
To work with box landmarks, one can first derive landmark coordinates in object
frame by scaling a origin centered unit box with `scale`, then transform to
camera frame by applying `rotation` and `translation`:
```
landmarks_3d = rotation * scale * unit_box + translation
```
#### NDC Space
In this API we use
[NDC(normalized device coordinates)](http://www.songho.ca/opengl/gl_projectionmatrix.html)
as an intermediate space when projecting points from 3D to 2D. In NDC space,
`x`, `y` are confined to `[-1, 1]`.
![ndc_coordinate.svg](../images/ndc_coordinate.svg)
By default the camera parameters `(fx, fy)` and `(px, py)` are defined in NDC
space. Given `(X, Y, Z)` of 3D points in camera coordinate, one can project 3D
points to NDC space as follows:
```
x_ndc = -fx * X / Z + px
y_ndc = -fy * Y / Z + py
z_ndc = 1 / Z
```
#### Pixel Space
In this API we set upper-left coner of an image as the origin of pixel
coordinate. One can convert from NDC to pixel space as follows:
```
x_pixel = (1 + x_ndc) / 2.0 * image_width
y_pixel = (1 - y_ndc) / 2.0 * image_height
```
Alternatively one can directly project from camera coordinate to pixel
coordinate with camera parameters `(fx_pixel, fy_pixel)` and `(px_pixel,
py_pixel)` defined in pixel space as follows:
```
x_pixel = -fx_pixel * X / Z + px_pixel
y_pixel = fy_pixel * Y / Z + py_pixel
```
Conversion of camera parameters from pixel space to NDC space:
```
fx = fx_pixel * 2.0 / image_width
fy = fy_pixel * 2.0 / image_height
```
```
px = -px_pixel * 2.0 / image_width + 1.0
py = -py_pixel * 2.0 / image_height + 1.0
```
## Resources
* Google AI Blog:
[Announcing the Objectron Dataset](https://ai.googleblog.com/2020/11/announcing-objectron-dataset.html)
* Google AI Blog:
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak
@ -191,5 +537,5 @@ to visualize its associated subgraphs, please see
* Paper:
[Instant 3D Object Tracking with Applications in Augmented Reality](https://drive.google.com/open?id=1O_zHmlgXIzAdKljp20U_JUkEHOGG52R8)
([presentation](https://www.youtube.com/watch?v=9ndF1AIo7h0))
* [TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_sneakers.tflite)
* [TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_chair.tflite)
* [Models and model cards](./models.md#objectron)
* [Python Colab](https://mediapipe.page.link/objectron_py_colab)

View File

@ -2,34 +2,43 @@
layout: default
title: Pose
parent: Solutions
has_children: true
has_toc: false
nav_order: 5
---
# MediaPipe BlazePose
# MediaPipe Pose
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
Human pose estimation from video plays a critical role in various applications
such as quantifying physical exercises, sign language recognition, and full-body
gesture control. For example, it can form the basis for yoga, dance, and fitness
applications. It can also enable the overlay of digital content and information
on top of the physical world in augmented reality.
such as [quantifying physical exercises](./pose_classification.md), sign
language recognition, and full-body gesture control. For example, it can form
the basis for yoga, dance, and fitness applications. It can also enable the
overlay of digital content and information on top of the physical world in
augmented reality.
MediaPipe Pose is a ML solution for high-fidelity upper-body pose tracking,
inferring 25 2D upper-body landmarks from RGB video frames utilizing our
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
33 3D landmarks on the whole body (or 25 upper-body landmarks) from RGB video
frames utilizing our
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
research. Current state-of-the-art approaches rely primarily on powerful desktop
research that also powers the
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
Current state-of-the-art approaches rely primarily on powerful desktop
environments for inference, whereas our method achieves real-time performance on
most modern [mobile phones](#mobile), [desktops/laptops](#desktop), in
[python](#python) and even on the [web](#web). A variant of MediaPipe Pose that
performs full-body pose tracking on mobile phones will be included in an
upcoming release of
[ML Kit](https://developers.google.com/ml-kit/early-access/pose-detection).
[python](#python-solution-api) and even on the [web](#javascript-solution-api).
![pose_tracking_upper_body_example.gif](../images/mobile/pose_tracking_upper_body_example.gif) |
:--------------------------------------------------------------------------------------------: |
@ -40,23 +49,24 @@ upcoming release of
The solution utilizes a two-step detector-tracker ML pipeline, proven to be
effective in our [MediaPipe Hands](./hands.md) and
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
first locates the pose region-of-interest (ROI) within the frame. The tracker
subsequently predicts the pose landmarks within the ROI using the ROI-cropped
frame as input. Note that for video use cases the detector is invoked only as
needed, i.e., for the very first frame and when the tracker could no longer
identify body pose presence in the previous frame. For other frames the pipeline
simply derives the ROI from the previous frames pose landmarks.
first locates the person/pose region-of-interest (ROI) within the frame. The
tracker subsequently predicts the pose landmarks within the ROI using the
ROI-cropped frame as input. Note that for video use cases the detector is
invoked only as needed, i.e., for the very first frame and when the tracker
could no longer identify body pose presence in the previous frame. For other
frames the pipeline simply derives the ROI from the previous frames pose
landmarks.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt)
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt)
that uses a
[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt)
[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt)
from the
[pose landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark)
and renders using a dedicated
[upper-body pose renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_gpu.pbtxt).
[pose renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/subgraphs/pose_renderer_gpu.pbtxt).
The
[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt)
[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt)
internally uses a
[pose detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt)
from the
@ -69,7 +79,7 @@ to visualize its associated subgraphs, please see
## Models
### Pose Detection Model (BlazePose Detector)
### Person/pose Detection Model (BlazePose Detector)
The detector is inspired by our own lightweight
[BlazeFace](https://arxiv.org/abs/1907.05047) model, used in
@ -85,28 +95,240 @@ hip midpoints.
:----------------------------------------------------------------------------------------------------: |
*Fig 2. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* |
### Pose Landmark Model (BlazePose Tracker)
### Pose Landmark Model (BlazePose GHUM 3D)
The landmark model currently included in MediaPipe Pose predicts the location of
25 upper-body landmarks (see figure below), with three degrees of freedom each
(x, y location and visibility), plus two virtual alignment keypoints. It shares
the same architecture as the full-body version that predicts 33 landmarks,
described in more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
and in this [paper](https://arxiv.org/abs/2006.10204).
The landmark model in MediaPipe Pose comes in two versions: a full-body model
that predicts the location of 33 pose landmarks (see figure below), and an
upper-body version that only predicts the first 25. The latter may be more
accurate than the former in scenarios where the lower-body parts are mostly out
of view.
![pose_tracking_upper_body_landmarks.png](../images/mobile/pose_tracking_upper_body_landmarks.png) |
:------------------------------------------------------------------------------------------------: |
*Fig 3. 25 upper-body pose landmarks.* |
Please find more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
this [paper](https://arxiv.org/abs/2006.10204) and
[the model card](./models.md#pose), and the attributes in each landmark
[below](#pose_landmarks).
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
:----------------------------------------------------------------------------------------------: |
*Fig 3. 33 pose landmarks.* |
## Solution APIs
### Cross-platform Configuration Options
Naming style and availability may differ slightly across platforms/languages.
#### static_image_mode
If set to `false`, the solution treats the input images as a video stream. It
will try to detect the most prominent person in the very first images, and upon
a successful detection further localizes the pose landmarks. In subsequent
images, it then simply tracks those landmarks without invoking another detection
until it loses track, on reducing computation and latency. If set to `true`,
person detection runs every input image, ideal for processing a batch of static,
possibly unrelated, images. Default to `false`.
#### upper_body_only
If set to `true`, the solution outputs only the 25 upper-body pose landmarks.
Otherwise, it outputs the full set of 33 pose landmarks. Note that
upper-body-only prediction may be more accurate for use cases where the
lower-body parts are mostly out of view. Default to `false`.
#### smooth_landmarks
If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
detection to be considered successful. Default to `0.5`.
#### min_tracking_confidence
Minimum confidence value (`[0.0, 1.0]`) from the landmark-tracking model for the
pose landmarks to be considered tracked successfully, or otherwise person
detection will be invoked automatically on the next input image. Setting it to a
higher value can increase robustness of the solution, at the expense of a higher
latency. Ignored if [static_image_mode](#static_image_mode) is `true`, where
person detection simply runs on every image. Default to `0.5`.
### Output
Naming style may differ slightly across platforms/languages.
#### pose_landmarks
A list of pose landmarks. Each lanmark consists of the following:
* `x` and `y`: Landmark coordinates normalized to `[0.0, 1.0]` by the image
width and height respectively.
* `z`: Represents the landmark depth with the depth at the midpoint of hips
being the origin, and the smaller the value the closer the landmark is to
the camera. The magnitude of `z` uses roughly the same scale as `x`.
Note: `z` is predicted only in full-body mode, and should be discarded when
[upper_body_only](#upper_body_only) is `true`.
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
landmark being visible (present and not occluded) in the image.
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
install MediaPipe Python package, then learn more in the companion
[Python Colab](#resources) and the following usage example.
Supported configuration options:
* [static_image_mode](#static_image_mode)
* [upper_body_only](#upper_body_only)
* [smooth_landmarks](#smooth_landmarks)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
# For static images:
with mp_pose.Pose(
static_image_mode=True, min_detection_confidence=0.5) as pose:
for idx, file in enumerate(file_list):
image = cv2.imread(file)
image_height, image_width, _ = image.shape
# Convert the BGR image to RGB before processing.
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if not results.pose_landmarks:
continue
print(
f'Nose coordinates: ('
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
)
# Draw pose landmarks on the image.
annotated_image = image.copy()
# Use mp_pose.UPPER_BODY_POSE_CONNECTIONS for drawing below when
# upper_body_only is set to True.
mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_pose.Pose(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as pose:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = pose.process(image)
# Draw the pose annotation on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.imshow('MediaPipe Pose', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
```
### JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and the following usage example.
Supported configuration options:
* [upperBodyOnly](#upper_body_only)
* [smoothLandmarks](#smooth_landmarks)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks,
{color: '#FF0000', lineWidth: 2});
canvasCtx.restore();
}
const pose = new Pose({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/pose/${file}`;
}});
pose.setOptions({
upperBodyOnly: false,
smoothLandmarks: true,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});
pose.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await pose.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps
Please first see general instructions for
[Android](../getting_started/building_examples.md#android),
[iOS](../getting_started/building_examples.md#ios),
[desktop](../getting_started/building_examples.md#desktop) and
[Python](../getting_started/building_examples.md#python) on how to build
MediaPipe examples.
[Android](../getting_started/android.md), [iOS](../getting_started/ios.md), and
[desktop](../getting_started/cpp.md) on how to build MediaPipe examples.
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -115,6 +337,18 @@ to visualize its associated subgraphs, please see
### Mobile
#### Main Example
* Graph:
[`mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/17GFIrqEJS6W8UHKXlYevTtSCLxN9pWlY/view?usp=sharing)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/posetrackinggpu:posetrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/posetrackinggpu/BUILD)
* iOS target:
[`mediapipe/examples/ios/posetrackinggpu:PoseTrackingGpuApp`](http:/mediapipe/examples/ios/posetrackinggpu/BUILD)
#### Upper-body Only
* Graph:
[`mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt)
* Android target:
@ -125,9 +359,23 @@ to visualize its associated subgraphs, please see
### Desktop
Please first see general instructions for
[desktop](../getting_started/building_examples.md#desktop) on how to build
MediaPipe examples.
Please first see general instructions for [desktop](../getting_started/cpp.md)
on how to build MediaPipe examples.
#### Main Example
* Running on CPU
* Graph:
[`mediapipe/graphs/pose_tracking/pose_tracking_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/pose_tracking_cpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/pose_tracking:pose_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/pose_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/pose_tracking:pose_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/pose_tracking/BUILD)
#### Upper-body Only
* Running on CPU
* Graph:
@ -140,48 +388,6 @@ MediaPipe examples.
* Target:
[`mediapipe/examples/desktop/upper_body_pose_tracking:upper_body_pose_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD)
### Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below and in this
[colab](https://mediapipe.page.link/mp-py-colab). If you do need to build the
Python package from source, see
[additional instructions](../getting_started/building_examples.md#python).
```bash
# Activate a Python virtual environment.
$ python3 -m venv mp_env && source mp_env/bin/activate
# Install MediaPipe Python package
(mp_env)$ pip install mediapipe
# Run in Python interpreter
(mp_env)$ python3
>>> import mediapipe as mp
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
# For image input
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
# To print out the pose landmarks, you can simply do "print(pose_landmarks)".
# However, the data points can be more accessible with the following approach.
>>> [print('x is', data_point.x, 'y is', data_point.y, 'z is', data_point.z, 'visibility is', data_point.visibility) for data_point in pose_landmarks.landmark]
# For live camera input
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
>>> pose_tracker.run_live()
# Close the tracker.
>>> pose_tracker.close()
```
Tip: Use command `deactivate` to exit the Python virtual environment.
### Web
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
## Resources
* Google AI Blog:
@ -189,8 +395,6 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web).
* Paper:
[BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204)
([presentation](https://youtu.be/YPpUOTRn5tA))
* Pose detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection.tflite)
* Upper-body pose landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite)
* [Model card](https://mediapipe.page.link/blazepose-mc)
* [Models and model cards](./models.md#pose)
* [Web demo](https://code.mediapipe.dev/codepen/pose)
* [Python Colab](https://mediapipe.page.link/pose_py_colab)

View File

@ -0,0 +1,142 @@
---
layout: default
title: Pose Classification
parent: Pose
grand_parent: Solutions
nav_order: 1
---
# Pose Classification
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
## Overview
One of the applications
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
can enable is fitness. More specifically - pose classification and repetition
counting. In this section we'll provide basic guidance on building a custom pose
classifier with the help of [Colabs](#colabs) and wrap it in a simple
[fitness app](https://mediapipe.page.link/mlkit-pose-classification-demo-app)
powered by [ML Kit](https://developers.google.com/ml-kit). Push-ups and squats
are used for demonstration purposes as the most common exercises.
![pose_classification_pushups_and_squats.gif](../images/mobile/pose_classification_pushups_and_squats.gif) |
:--------------------------------------------------------------------------------------------------------: |
*Fig 1. Pose classification and repetition counting with MediaPipe Pose.* |
We picked the
[k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm)
(k-NN) as the classifier. It's simple and easy to start with. The algorithm
determines the object's class based on the closest samples in the training set.
**To build it, one needs to:**
1. Collect image samples of the target exercises and run pose prediction on
them,
2. Convert obtained pose landmarks to a representation suitable for the k-NN
classifier and form a training set using these [Colabs](#colabs),
3. Perform the classification itself followed by repetition counting (e.g., in
the
[ML Kit demo app](https://mediapipe.page.link/mlkit-pose-classification-demo-app)).
## Training Set
To build a good classifier appropriate samples should be collected for the
training set: about a few hundred samples for each terminal state of each
exercise (e.g., "up" and "down" positions for push-ups). It's important that
collected samples cover different camera angles, environment conditions, body
shapes, and exercise variations.
![pose_classification_pushups_un_and_down_samples.jpg](../images/mobile/pose_classification_pushups_un_and_down_samples.jpg) |
:--------------------------------------------------------------------------------------------------------------------------: |
*Fig 2. Two terminal states of push-ups.* |
To transform samples into a k-NN classifier training set, both
[`Pose Classification Colab (Basic)`] and
[`Pose Classification Colab (Extended)`] could be used. They use the
[Python Solution API](./pose.md#python-solution-api) to run the BlazePose models
on given images and dump predicted pose landmarks to a CSV file. Additionally,
the [`Pose Classification Colab (Extended)`] provides useful tools to find
outliers (e.g., wrongly predicted poses) and underrepresented classes (e.g., not
covering all camera angles) by classifying each sample against the entire
training set. After that, you'll be able to test the classifier on an arbitrary
video right in the Colab.
## Classification
Code of the classifier is available both in the
[`Pose Classification Colab (Extended)`] and in the
[ML Kit demo app](https://mediapipe.page.link/mlkit-pose-classification-demo-app).
Please refer to them for details of the approach described below.
The k-NN algorithm used for pose classification requires a feature vector
representation of each sample and a metric to compute the distance between two
such vectors to find the nearest pose samples to a target one.
To convert pose landmarks to a feature vector, we use pairwise distances between
predefined lists of pose joints, such as distances between wrist and shoulder,
ankle and hip, and two wrists. Since the algorithm relies on distances, all
poses are normalized to have the same torso size and vertical torso orientation
before the conversion.
![pose_classification_pairwise_distances.png](../images/mobile/pose_classification_pairwise_distances.png) |
:--------------------------------------------------------------------------------------------------------: |
*Fig 3. Main pairwise distances used for the pose feature vector.* |
To get a better classification result, k-NN search is invoked twice with
different distance metrics:
* First, to filter out samples that are almost the same as the target one but
have only a few different values in the feature vector (which means
differently bent joints and thus other pose class), minimum per-coordinate
distance is used as distance metric,
* Then average per-coordinate distance is used to find the nearest pose
cluster among those from the first search.
Finally, we apply
[exponential moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average)
(EMA) smoothing to level any noise from pose prediction or classification. To do
that, we search not only for the nearest pose cluster, but we calculate a
probability for each of them and use it for smoothing over time.
## Repetition Counting
To count the repetitions, the algorithm monitors the probability of a target
pose class. Let's take push-ups with its "up" and "down" terminal states:
* When the probability of the "down" pose class passes a certain threshold for
the first time, the algorithm marks that the "down" pose class is entered.
* Once the probability drops below the threshold, the algorithm marks that the
"down" pose class has been exited and increases the counter.
To avoid cases when the probability fluctuates around the threshold (e.g., when
the user pauses between "up" and "down" states) causing phantom counts, the
threshold used to detect when the state is exited is actually slightly lower
than the one used to detect when the state is entered. It creates an interval
where the pose class and the counter can't be changed.
## Future Work
We are actively working on improving BlazePose GHUM 3D's Z prediction. It will
allow us to use joint angles in the feature vectors, which are more natural and
easier to configure (although distances can still be useful to detect touches
between body parts) and to perform rotation normalization of poses and reduce
the number of camera angles required for accurate k-NN classification.
## Colabs
* [`Pose Classification Colab (Basic)`]
* [`Pose Classification Colab (Extended)`]
[`Pose Classification Colab (Basic)`]: https://mediapipe.page.link/pose_classification_basic
[`Pose Classification Colab (Extended)`]: https://mediapipe.page.link/pose_classification_extended

View File

@ -16,19 +16,24 @@ has_toc: false
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to ../external_index.md. -->
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md)
:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | |
See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.

View File

@ -2,14 +2,20 @@
layout: default
title: YouTube-8M Feature Extraction and Model Inference
parent: Solutions
nav_order: 14
nav_order: 15
---
# YouTube-8M Feature Extraction and Model Inference
{: .no_toc }
<details close markdown="block">
<summary>
Table of contents
</summary>
{: .text-delta }
1. TOC
{:toc}
</details>
---
MediaPipe is a useful and general framework for media processing that can assist

View File

@ -26,9 +26,10 @@ To enable tracing and profiling of a mediapipe graph:
1. The profiling library must be linked to the framework.
2. Tracing and profiling must be enabled in the graph configuration.
The profiling library is linked to the framework by default. If needed,
the profiling library can be omitted from the framework using the bazel
command line option: `--define MEDIAPIPE_PROFILING=0`.
The profiling library is linked to the framework by default for Desktop.
If needed, it can be omitted from the framework using the bazel command line
option: `--define MEDIAPIPE_PROFILING=0`. For other platforms, you can use the
bazel command line option `--define MEDIAPIPE_PROFILING=1` to link it.
To enable tracing and profiling, the `CalculatorGraphConfig` (in
[calculator.proto](https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto))
@ -38,6 +39,7 @@ is a simple setup that turns on tracing and keeps 100 seconds of timing events:
```
profiler_config {
trace_enabled: true
enable_profiler: true
trace_log_interval_count: 200
}
```
@ -71,6 +73,9 @@ MediaPipe will emit data into a pre-specified directory:
You can open the Download Container. Logs will be located in `application
container/.xcappdata/AppData/Documents/`
If XCode shows empty content for the downloaded container file, you can
right click and select 'Show Package Contents' in Finder. Logs
will be located in 'AppData/Documents/'
![Windows Download Container](../images/visualizer/ios_download_container.png)
@ -144,6 +149,7 @@ we record ten intervals of half a second each. This can be overridden by adding
```bash
profiler_config {
trace_enabled: true
enable_profiler: true
trace_log_path: "/sdcard/profiles/"
}
```

View File

@ -37,7 +37,7 @@ The graph can be modified by adding and editing code in the Editor view.
![New Button](../images/upload_button.png)
* Pressing the "Upload" button will prompt the user to select a local PBTXT
file, which will everwrite the current code within the editor.
file, which will overwrite the current code within the editor.
* Alternatively, code can be pasted directly into the editor window.

View File

@ -2,34 +2,40 @@
"additionalFilePaths" : [
"/BUILD",
"mediapipe/BUILD",
"mediapipe/objc/BUILD",
"mediapipe/framework/BUILD",
"mediapipe/gpu/BUILD",
"mediapipe/objc/testing/app/BUILD",
"mediapipe/examples/ios/common/BUILD",
"mediapipe/examples/ios/helloworld/BUILD",
"mediapipe/examples/ios/facedetectioncpu/BUILD",
"mediapipe/examples/ios/facedetectiongpu/BUILD",
"mediapipe/examples/ios/faceeffect/BUILD",
"mediapipe/examples/ios/facemeshgpu/BUILD",
"mediapipe/examples/ios/handdetectiongpu/BUILD",
"mediapipe/examples/ios/handtrackinggpu/BUILD",
"mediapipe/examples/ios/helloworld/BUILD",
"mediapipe/examples/ios/holistictrackinggpu/BUILD",
"mediapipe/examples/ios/iristrackinggpu/BUILD",
"mediapipe/examples/ios/multihandtrackinggpu/BUILD",
"mediapipe/examples/ios/objectdetectioncpu/BUILD",
"mediapipe/examples/ios/objectdetectiongpu/BUILD",
"mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD"
"mediapipe/examples/ios/objectdetectiontrackinggpu/BUILD",
"mediapipe/examples/ios/posetrackinggpu/BUILD",
"mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD",
"mediapipe/framework/BUILD",
"mediapipe/gpu/BUILD",
"mediapipe/objc/BUILD",
"mediapipe/objc/testing/app/BUILD"
],
"buildTargets" : [
"//mediapipe/examples/ios/helloworld:HelloWorldApp",
"//mediapipe/examples/ios/facedetectioncpu:FaceDetectionCpuApp",
"//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp",
"//mediapipe/examples/ios/faceeffect:FaceEffectApp",
"//mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp",
"//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
"//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
"//mediapipe/examples/ios/helloworld:HelloWorldApp",
"//mediapipe/examples/ios/holistictrackinggpu:HolisticTrackingGpuApp",
"//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp",
"//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
"//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
"//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
"//mediapipe/examples/ios/objectdetectiontrackinggpu:ObjectDetectionTrackingGpuApp",
"//mediapipe/examples/ios/posetrackinggpu:PoseTrackingGpuApp",
"//mediapipe/examples/ios/upperbodyposetrackinggpu:UpperBodyPoseTrackingGpuApp",
"//mediapipe/objc:mediapipe_framework_ios"
],
@ -87,15 +93,18 @@
"mediapipe/examples/ios",
"mediapipe/examples/ios/common",
"mediapipe/examples/ios/common/Base.lproj",
"mediapipe/examples/ios/helloworld",
"mediapipe/examples/ios/facedetectioncpu",
"mediapipe/examples/ios/facedetectiongpu",
"mediapipe/examples/ios/faceeffect",
"mediapipe/examples/ios/faceeffect/Base.lproj",
"mediapipe/examples/ios/handdetectiongpu",
"mediapipe/examples/ios/handtrackinggpu",
"mediapipe/examples/ios/helloworld",
"mediapipe/examples/ios/holistictrackinggpu",
"mediapipe/examples/ios/iristrackinggpu",
"mediapipe/examples/ios/multihandtrackinggpu",
"mediapipe/examples/ios/objectdetectioncpu",
"mediapipe/examples/ios/objectdetectiongpu",
"mediapipe/examples/ios/posetrackinggpu",
"mediapipe/examples/ios/upperbodyposetrackinggpu",
"mediapipe/framework",
"mediapipe/framework/deps",
@ -110,6 +119,7 @@
"mediapipe/graphs",
"mediapipe/graphs/edge_detection",
"mediapipe/graphs/face_detection",
"mediapipe/graphs/face_geometry",
"mediapipe/graphs/hand_tracking",
"mediapipe/graphs/object_detection",
"mediapipe/graphs/pose_tracking",

View File

@ -9,18 +9,21 @@
"packages" : [
"",
"mediapipe",
"mediapipe/objc",
"mediapipe/examples/ios",
"mediapipe/examples/ios/facedetectioncpu",
"mediapipe/examples/ios/facedetectiongpu",
"mediapipe/examples/ios/faceeffect",
"mediapipe/examples/ios/facemeshgpu",
"mediapipe/examples/ios/handdetectiongpu",
"mediapipe/examples/ios/handtrackinggpu",
"mediapipe/examples/ios/holistictrackinggpu",
"mediapipe/examples/ios/iristrackinggpu",
"mediapipe/examples/ios/multihandtrackinggpu",
"mediapipe/examples/ios/objectdetectioncpu",
"mediapipe/examples/ios/objectdetectiongpu",
"mediapipe/examples/ios/upperbodyposetrackinggpu"
"mediapipe/examples/ios/objectdetectiontrackinggpu",
"mediapipe/examples/ios/posetrackinggpu",
"mediapipe/examples/ios/upperbodyposetrackinggpu",
"mediapipe/objc"
],
"projectName" : "Mediapipe",
"workspaceRoot" : "../.."

View File

@ -1,4 +1,4 @@
# Copyright 2019 The MediaPipe Authors.
# Copyright 2019, 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -167,7 +167,7 @@ cc_library(
"//mediapipe/util:time_series_util",
"@com_google_absl//absl/strings",
"@com_google_audio_tools//audio/dsp:resampler",
"@com_google_audio_tools//audio/dsp:resampler_rational_factor",
"@com_google_audio_tools//audio/dsp:resampler_q",
"@eigen_archive//:eigen",
],
alwayslink = 1,
@ -242,6 +242,7 @@ cc_test(
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:time_series_header_cc_proto",
"//mediapipe/framework/port:commandlineflags",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
],

View File

@ -48,18 +48,17 @@ namespace mediapipe {
// TODO: support decoding multiple streams.
class AudioDecoderCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
static absl::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
absl::Status Close(CalculatorContext* cc) override;
private:
std::unique_ptr<AudioDecoder> decoder_;
};
::mediapipe::Status AudioDecoderCalculator::GetContract(
CalculatorContract* cc) {
absl::Status AudioDecoderCalculator::GetContract(CalculatorContract* cc) {
cc->InputSidePackets().Tag("INPUT_FILE_PATH").Set<std::string>();
if (cc->InputSidePackets().HasTag("OPTIONS")) {
cc->InputSidePackets().Tag("OPTIONS").Set<mediapipe::AudioDecoderOptions>();
@ -68,10 +67,10 @@ class AudioDecoderCalculator : public CalculatorBase {
if (cc->Outputs().HasTag("AUDIO_HEADER")) {
cc->Outputs().Tag("AUDIO_HEADER").SetNone();
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status AudioDecoderCalculator::Open(CalculatorContext* cc) {
absl::Status AudioDecoderCalculator::Open(CalculatorContext* cc) {
const std::string& input_file_path =
cc->InputSidePackets().Tag("INPUT_FILE_PATH").Get<std::string>();
const auto& decoder_options =
@ -88,10 +87,10 @@ class AudioDecoderCalculator : public CalculatorBase {
cc->Outputs().Tag("AUDIO_HEADER").SetHeader(Adopt(header.release()));
}
cc->Outputs().Tag("AUDIO_HEADER").Close();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status AudioDecoderCalculator::Process(CalculatorContext* cc) {
absl::Status AudioDecoderCalculator::Process(CalculatorContext* cc) {
Packet data;
int options_index = -1;
auto status = decoder_->GetData(&options_index, &data);
@ -101,7 +100,7 @@ class AudioDecoderCalculator : public CalculatorBase {
return status;
}
::mediapipe::Status AudioDecoderCalculator::Close(CalculatorContext* cc) {
absl::Status AudioDecoderCalculator::Close(CalculatorContext* cc) {
return decoder_->Close();
}

View File

@ -15,6 +15,7 @@
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/time_series_header.pb.h"
#include "mediapipe/framework/port/commandlineflags.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"

View File

@ -38,7 +38,7 @@ static bool SafeMultiply(int x, int y, int* result) {
}
} // namespace
::mediapipe::Status BasicTimeSeriesCalculatorBase::GetContract(
absl::Status BasicTimeSeriesCalculatorBase::GetContract(
CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Input stream with TimeSeriesHeader.
@ -46,10 +46,10 @@ static bool SafeMultiply(int x, int y, int* result) {
cc->Outputs().Index(0).Set<Matrix>(
// Output stream with TimeSeriesHeader.
);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status BasicTimeSeriesCalculatorBase::Open(CalculatorContext* cc) {
absl::Status BasicTimeSeriesCalculatorBase::Open(CalculatorContext* cc) {
TimeSeriesHeader input_header;
MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
cc->Inputs().Index(0).Header(), &input_header));
@ -57,11 +57,13 @@ static bool SafeMultiply(int x, int y, int* result) {
auto output_header = new TimeSeriesHeader(input_header);
MP_RETURN_IF_ERROR(MutateHeader(output_header));
cc->Outputs().Index(0).SetHeader(Adopt(output_header));
return ::mediapipe::OkStatus();
cc->SetOffset(0);
return absl::OkStatus();
}
::mediapipe::Status BasicTimeSeriesCalculatorBase::Process(
CalculatorContext* cc) {
absl::Status BasicTimeSeriesCalculatorBase::Process(CalculatorContext* cc) {
const Matrix& input = cc->Inputs().Index(0).Get<Matrix>();
MP_RETURN_IF_ERROR(time_series_util::IsMatrixShapeConsistentWithHeader(
input, cc->Inputs().Index(0).Header().Get<TimeSeriesHeader>()));
@ -71,12 +73,12 @@ static bool SafeMultiply(int x, int y, int* result) {
*output, cc->Outputs().Index(0).Header().Get<TimeSeriesHeader>()));
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status BasicTimeSeriesCalculatorBase::MutateHeader(
absl::Status BasicTimeSeriesCalculatorBase::MutateHeader(
TimeSeriesHeader* output_header) {
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
// Calculator to sum an input time series across channels. This is
@ -86,9 +88,9 @@ static bool SafeMultiply(int x, int y, int* result) {
class SumTimeSeriesAcrossChannelsCalculator
: public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_channels(1);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -104,9 +106,9 @@ REGISTER_CALCULATOR(SumTimeSeriesAcrossChannelsCalculator);
class AverageTimeSeriesAcrossChannelsCalculator
: public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_channels(1);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -122,7 +124,7 @@ REGISTER_CALCULATOR(AverageTimeSeriesAcrossChannelsCalculator);
// Options proto: None.
class SummarySaiToPitchogramCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
if (output_header->num_channels() != 1) {
return tool::StatusInvalid(
absl::StrCat("Expected single-channel input, got ",
@ -131,7 +133,7 @@ class SummarySaiToPitchogramCalculator : public BasicTimeSeriesCalculatorBase {
output_header->set_num_channels(output_header->num_samples());
output_header->set_num_samples(1);
output_header->set_sample_rate(output_header->packet_rate());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -160,7 +162,7 @@ REGISTER_CALCULATOR(ReverseChannelOrderCalculator);
// Options proto: None.
class FlattenPacketCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
const int num_input_channels = output_header->num_channels();
const int num_input_samples = output_header->num_samples();
RET_CHECK(num_input_channels >= 0)
@ -174,7 +176,7 @@ class FlattenPacketCalculator : public BasicTimeSeriesCalculatorBase {
output_header->set_num_channels(output_num_channels);
output_header->set_num_samples(1);
output_header->set_sample_rate(output_header->packet_rate());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -253,10 +255,10 @@ REGISTER_CALCULATOR(DivideByMeanAcrossChannelsCalculator);
// Options proto: None.
class MeanCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_samples(1);
output_header->set_sample_rate(output_header->packet_rate());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -272,10 +274,10 @@ REGISTER_CALCULATOR(MeanCalculator);
// Options proto: None.
class StandardDeviationCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_samples(1);
output_header->set_sample_rate(output_header->packet_rate());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -293,9 +295,9 @@ REGISTER_CALCULATOR(StandardDeviationCalculator);
// Options proto: None.
class CovarianceCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_samples(output_header->num_channels());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -313,9 +315,9 @@ REGISTER_CALCULATOR(CovarianceCalculator);
// Options proto: None.
class L2NormCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
output_header->set_num_channels(1);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {
@ -385,12 +387,12 @@ REGISTER_CALCULATOR(ElementwiseSquareCalculator);
// Options proto: None.
class FirstHalfSlicerCalculator : public BasicTimeSeriesCalculatorBase {
protected:
::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header) final {
absl::Status MutateHeader(TimeSeriesHeader* output_header) final {
const int num_input_samples = output_header->num_samples();
RET_CHECK(num_input_samples >= 0)
<< "FirstHalfSlicerCalculator: num_input_samples < 0";
output_header->set_num_samples(num_input_samples / 2);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix ProcessMatrix(const Matrix& input_matrix) final {

View File

@ -28,16 +28,16 @@ namespace mediapipe {
class BasicTimeSeriesCalculatorBase : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) final;
absl::Status Process(CalculatorContext* cc) final;
protected:
// Open() calls this method to mutate the output stream header. The input
// to this function will contain a copy of the input stream header, so
// subclasses that do not need to mutate the header do not need to override
// it.
virtual ::mediapipe::Status MutateHeader(TimeSeriesHeader* output_header);
virtual absl::Status MutateHeader(TimeSeriesHeader* output_header);
// Process() calls this method on each packet to compute the output matrix.
virtual Matrix ProcessMatrix(const Matrix& input_matrix) = 0;

View File

@ -66,7 +66,7 @@ std::string PortableDebugString(const TimeSeriesHeader& header) {
// rows corresponding to the new feature space).
class FramewiseTransformCalculatorBase : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Sequence of Matrices, each column describing a particular time frame,
// each row a feature dimension, with TimeSeriesHeader.
@ -75,11 +75,11 @@ class FramewiseTransformCalculatorBase : public CalculatorBase {
// Sequence of Matrices, each column describing a particular time frame,
// each row a feature dimension, with TimeSeriesHeader.
);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
absl::Status Open(CalculatorContext* cc) final;
absl::Status Process(CalculatorContext* cc) final;
int num_output_channels(void) { return num_output_channels_; }
@ -90,7 +90,7 @@ class FramewiseTransformCalculatorBase : public CalculatorBase {
private:
// Takes header and options, and sets up state including calling
// set_num_output_channels() on the base object.
virtual ::mediapipe::Status ConfigureTransform(const TimeSeriesHeader& header,
virtual absl::Status ConfigureTransform(const TimeSeriesHeader& header,
CalculatorContext* cc) = 0;
// Takes a vector<double> corresponding to an input frame, and
@ -102,23 +102,23 @@ class FramewiseTransformCalculatorBase : public CalculatorBase {
int num_output_channels_;
};
::mediapipe::Status FramewiseTransformCalculatorBase::Open(
CalculatorContext* cc) {
absl::Status FramewiseTransformCalculatorBase::Open(CalculatorContext* cc) {
TimeSeriesHeader input_header;
MP_RETURN_IF_ERROR(time_series_util::FillTimeSeriesHeaderIfValid(
cc->Inputs().Index(0).Header(), &input_header));
::mediapipe::Status status = ConfigureTransform(input_header, cc);
absl::Status status = ConfigureTransform(input_header, cc);
auto output_header = new TimeSeriesHeader(input_header);
output_header->set_num_channels(num_output_channels_);
cc->Outputs().Index(0).SetHeader(Adopt(output_header));
cc->SetOffset(0);
return status;
}
::mediapipe::Status FramewiseTransformCalculatorBase::Process(
CalculatorContext* cc) {
absl::Status FramewiseTransformCalculatorBase::Process(CalculatorContext* cc) {
const Matrix& input = cc->Inputs().Index(0).Get<Matrix>();
const int num_frames = input.cols();
std::unique_ptr<Matrix> output(new Matrix(num_output_channels_, num_frames));
@ -145,7 +145,7 @@ class FramewiseTransformCalculatorBase : public CalculatorBase {
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
// Calculator wrapper around the dsp/mfcc/mfcc.cc routine.
@ -170,12 +170,12 @@ class FramewiseTransformCalculatorBase : public CalculatorBase {
// }
class MfccCalculator : public FramewiseTransformCalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
return FramewiseTransformCalculatorBase::GetContract(cc);
}
private:
::mediapipe::Status ConfigureTransform(const TimeSeriesHeader& header,
absl::Status ConfigureTransform(const TimeSeriesHeader& header,
CalculatorContext* cc) override {
MfccCalculatorOptions mfcc_options = cc->Options<MfccCalculatorOptions>();
mfcc_.reset(new audio_dsp::Mfcc());
@ -194,7 +194,7 @@ class MfccCalculator : public FramewiseTransformCalculatorBase {
// audio_dsp::MelFilterBank needs to know this to
// correctly interpret the spectrogram bins.
if (!header.has_audio_sample_rate()) {
return ::mediapipe::InvalidArgumentError(
return absl::InvalidArgumentError(
absl::StrCat("No audio_sample_rate in input TimeSeriesHeader ",
PortableDebugString(header)));
}
@ -203,9 +203,9 @@ class MfccCalculator : public FramewiseTransformCalculatorBase {
mfcc_->Initialize(input_length, header.audio_sample_rate());
if (initialized) {
return ::mediapipe::OkStatus();
return absl::OkStatus();
} else {
return ::mediapipe::Status(mediapipe::StatusCode::kInternal,
return absl::Status(absl::StatusCode::kInternal,
"Mfcc::Initialize returned uninitialized");
}
}
@ -228,12 +228,12 @@ REGISTER_CALCULATOR(MfccCalculator);
// if you ask for too many channels.
class MelSpectrumCalculator : public FramewiseTransformCalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
return FramewiseTransformCalculatorBase::GetContract(cc);
}
private:
::mediapipe::Status ConfigureTransform(const TimeSeriesHeader& header,
absl::Status ConfigureTransform(const TimeSeriesHeader& header,
CalculatorContext* cc) override {
MelSpectrumCalculatorOptions mel_spectrum_options =
cc->Options<MelSpectrumCalculatorOptions>();
@ -245,7 +245,7 @@ class MelSpectrumCalculator : public FramewiseTransformCalculatorBase {
// audio_dsp::MelFilterBank needs to know this to
// correctly interpret the spectrogram bins.
if (!header.has_audio_sample_rate()) {
return ::mediapipe::InvalidArgumentError(
return absl::InvalidArgumentError(
absl::StrCat("No audio_sample_rate in input TimeSeriesHeader ",
PortableDebugString(header)));
}
@ -255,9 +255,9 @@ class MelSpectrumCalculator : public FramewiseTransformCalculatorBase {
mel_spectrum_options.max_frequency_hertz());
if (initialized) {
return ::mediapipe::OkStatus();
return absl::OkStatus();
} else {
return ::mediapipe::Status(mediapipe::StatusCode::kInternal,
return absl::Status(absl::StatusCode::kInternal,
"mfcc::Initialize returned uninitialized");
}
}

View File

@ -84,7 +84,7 @@ class FramewiseTransformCalculatorTest
num_samples_per_packet_ = GenerateRandomNonnegInputStream(kNumPackets);
}
::mediapipe::Status Run() { return this->RunGraph(); }
absl::Status Run() { return this->RunGraph(); }
void CheckResults(int expected_num_channels) {
const auto& output_header =

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2019, 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -16,22 +16,18 @@
#include "mediapipe/calculators/audio/rational_factor_resample_calculator.h"
#include "audio/dsp/resampler_rational_factor.h"
#include "audio/dsp/resampler_q.h"
using audio_dsp::DefaultResamplingKernel;
using audio_dsp::RationalFactorResampler;
using audio_dsp::Resampler;
namespace mediapipe {
::mediapipe::Status RationalFactorResampleCalculator::Process(
CalculatorContext* cc) {
absl::Status RationalFactorResampleCalculator::Process(CalculatorContext* cc) {
return ProcessInternal(cc->Inputs().Index(0).Get<Matrix>(), false, cc);
}
::mediapipe::Status RationalFactorResampleCalculator::Close(
CalculatorContext* cc) {
absl::Status RationalFactorResampleCalculator::Close(CalculatorContext* cc) {
if (initial_timestamp_ == Timestamp::Unstarted()) {
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
Matrix empty_input_frame(num_channels_, 0);
return ProcessInternal(empty_input_frame, true, cc);
@ -40,11 +36,8 @@ namespace mediapipe {
namespace {
void CopyChannelToVector(const Matrix& matrix, int channel,
std::vector<float>* vec) {
vec->clear();
vec->reserve(matrix.cols());
for (int sample = 0; sample < matrix.cols(); ++sample) {
vec->push_back(matrix(channel, sample));
}
vec->resize(matrix.cols());
Eigen::Map<Eigen::ArrayXf>(vec->data(), vec->size()) = matrix.row(channel);
}
void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
@ -53,17 +46,14 @@ void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
matrix->resize(matrix->rows(), vec.size());
} else {
CHECK_EQ(vec.size(), matrix->cols());
}
CHECK_LT(channel, matrix->rows());
}
for (int sample = 0; sample < matrix->cols(); ++sample) {
(*matrix)(channel, sample) = vec[sample];
}
matrix->row(channel) =
Eigen::Map<const Eigen::ArrayXf>(vec.data(), vec.size());
}
} // namespace
::mediapipe::Status RationalFactorResampleCalculator::Open(
CalculatorContext* cc) {
absl::Status RationalFactorResampleCalculator::Open(CalculatorContext* cc) {
RationalFactorResampleCalculatorOptions resample_options =
cc->Options<RationalFactorResampleCalculatorOptions>();
@ -88,7 +78,7 @@ void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
resample_options);
if (!r) {
LOG(ERROR) << "Failed to initialize resampler.";
return ::mediapipe::UnknownError("Failed to initialize resampler.");
return absl::UnknownError("Failed to initialize resampler.");
}
}
}
@ -106,10 +96,10 @@ void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
initial_timestamp_ = Timestamp::Unstarted();
check_inconsistent_timestamps_ =
resample_options.check_inconsistent_timestamps();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status RationalFactorResampleCalculator::ProcessInternal(
absl::Status RationalFactorResampleCalculator::ProcessInternal(
const Matrix& input_frame, bool should_flush, CalculatorContext* cc) {
if (initial_timestamp_ == Timestamp::Unstarted()) {
initial_timestamp_ = cc->InputTimestamp();
@ -131,7 +121,7 @@ void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
*output_frame = input_frame;
} else {
if (!Resample(input_frame, output_frame.get(), should_flush)) {
return ::mediapipe::UnknownError("Resample() failed.");
return absl::UnknownError("Resample() failed.");
}
}
cumulative_output_samples_ += output_frame->cols();
@ -139,7 +129,7 @@ void CopyVectorToChannel(const std::vector<float>& vec, Matrix* matrix,
if (output_frame->cols() > 0) {
cc->Outputs().Index(0).Add(output_frame.release(), output_timestamp);
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
bool RationalFactorResampleCalculator::Resample(const Matrix& input_frame,
@ -167,25 +157,28 @@ RationalFactorResampleCalculator::ResamplerFromOptions(
std::unique_ptr<Resampler<float>> resampler;
const auto& rational_factor_options =
options.resampler_rational_factor_options();
std::unique_ptr<DefaultResamplingKernel> kernel;
audio_dsp::QResamplerParams params;
if (rational_factor_options.has_radius() &&
rational_factor_options.has_cutoff() &&
rational_factor_options.has_kaiser_beta()) {
kernel = absl::make_unique<DefaultResamplingKernel>(
source_sample_rate, target_sample_rate,
rational_factor_options.radius(), rational_factor_options.cutoff(),
rational_factor_options.kaiser_beta());
} else {
kernel = absl::make_unique<DefaultResamplingKernel>(source_sample_rate,
target_sample_rate);
// Convert RationalFactorResampler kernel parameters to QResampler
// settings.
params.filter_radius_factor =
rational_factor_options.radius() *
std::min(1.0, target_sample_rate / source_sample_rate);
params.cutoff_proportion = 2 * rational_factor_options.cutoff() /
std::min(source_sample_rate, target_sample_rate);
params.kaiser_beta = rational_factor_options.kaiser_beta();
}
// Set large enough so that the resampling factor between common sample
// rates (e.g. 8kHz, 16kHz, 22.05kHz, 32kHz, 44.1kHz, 48kHz) is exact, and
// that any factor is represented with error less than 0.025%.
const int kMaxDenominator = 2000;
resampler = absl::make_unique<RationalFactorResampler<float>>(
*kernel, kMaxDenominator);
params.max_denominator = 2000;
// NOTE: QResampler supports multichannel resampling, so the code might be
// simplified using a single instance rather than one per channel.
resampler = absl::make_unique<audio_dsp::QResampler<float>>(
source_sample_rate, target_sample_rate, /*num_channels=*/1, params);
if (resampler != nullptr && !resampler->Valid()) {
resampler = std::unique_ptr<Resampler<float>>();
}

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2019, 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -36,28 +36,31 @@ namespace mediapipe {
// stream's sampling rate is specified by target_sample_rate in the
// RationalFactorResampleCalculatorOptions. The output time series may have
// a varying number of samples per frame.
//
// NOTE: This calculator uses QResampler, despite the name, which supersedes
// RationalFactorResampler.
class RationalFactorResampleCalculator : public CalculatorBase {
public:
struct TestAccess;
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Single input stream with TimeSeriesHeader.
);
cc->Outputs().Index(0).Set<Matrix>(
// Resampled stream with TimeSeriesHeader.
);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
// Returns FAIL if the input stream header is invalid or if the
// resampler cannot be initialized.
::mediapipe::Status Open(CalculatorContext* cc) override;
absl::Status Open(CalculatorContext* cc) override;
// Resamples a packet of TimeSeries data. Returns FAIL if the
// resampler state becomes inconsistent.
::mediapipe::Status Process(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
// Flushes any remaining state. Returns FAIL if the resampler state
// becomes inconsistent.
::mediapipe::Status Close(CalculatorContext* cc) override;
absl::Status Close(CalculatorContext* cc) override;
protected:
typedef audio_dsp::Resampler<float> ResamplerType;
@ -72,8 +75,8 @@ class RationalFactorResampleCalculator : public CalculatorBase {
// Does Timestamp bookkeeping and resampling common to Process() and
// Close(). Returns FAIL if the resampler state becomes
// inconsistent.
::mediapipe::Status ProcessInternal(const Matrix& input_frame,
bool should_flush, CalculatorContext* cc);
absl::Status ProcessInternal(const Matrix& input_frame, bool should_flush,
CalculatorContext* cc);
// Uses the internal resampler_ objects to actually resample each
// row of the input TimeSeries. Returns false if the resampler

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2019, 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
// NOTE: This calculator uses QResampler, despite the name, which supersedes
// RationalFactorResampler.
message RationalFactorResampleCalculatorOptions {
extend CalculatorOptions {
optional RationalFactorResampleCalculatorOptions ext = 259760074;
@ -27,8 +29,7 @@ message RationalFactorResampleCalculatorOptions {
// stream. Required. Must be greater than 0.
optional double target_sample_rate = 1;
// Parameters for initializing the RationalFactorResampler. See
// RationalFactorResampler for more details.
// Parameters for initializing QResampler. See QResampler for more details.
message ResamplerRationalFactorOptions {
// Kernel radius in units of input samples.
optional double radius = 1;

View File

@ -80,7 +80,7 @@ class RationalFactorResampleCalculatorTest
}
// Initializes and runs the test graph.
::mediapipe::Status Run(double output_sample_rate) {
absl::Status Run(double output_sample_rate) {
options_.set_target_sample_rate(output_sample_rate);
InitializeGraph();
@ -120,7 +120,6 @@ class RationalFactorResampleCalculatorTest
// The exact number of expected samples may vary based on the implementation
// of the resampler since the exact value is not an integer.
// TODO: Reduce this offset to + 1 once cl/185829520 is submitted.
const double expected_num_output_samples = num_input_samples_ * factor;
EXPECT_LE(ceil(expected_num_output_samples), num_output_samples);
EXPECT_GE(ceil(expected_num_output_samples) + 11, num_output_samples);

View File

@ -66,7 +66,7 @@ namespace mediapipe {
// analysis frame will advance from its predecessor by the same time step.
class SpectrogramCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Input stream with TimeSeriesHeader.
);
@ -96,26 +96,34 @@ class SpectrogramCalculator : public CalculatorBase {
);
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
// Returns FAIL if the input stream header is invalid.
::mediapipe::Status Open(CalculatorContext* cc) override;
absl::Status Open(CalculatorContext* cc) override;
// Outputs at most one packet consisting of a single Matrix with one or
// more columns containing the spectral values from as many input frames
// as are completed by the input samples. Always returns OK.
::mediapipe::Status Process(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
// Performs zero-padding and processing of any remaining samples
// if pad_final_packet is set.
// Returns OK.
::mediapipe::Status Close(CalculatorContext* cc) override;
absl::Status Close(CalculatorContext* cc) override;
private:
Timestamp CurrentOutputTimestamp(CalculatorContext* cc) {
if (use_local_timestamp_) {
return cc->InputTimestamp();
const Timestamp now = cc->InputTimestamp();
if (now == Timestamp::Done()) {
// During Close the timestamp is not available, send an estimate.
return last_local_output_timestamp_ +
round(last_completed_frames_ * frame_step_samples() *
Timestamp::kTimestampUnitsPerSecond / input_sample_rate_);
}
last_local_output_timestamp_ = now;
return now;
}
return CumulativeOutputTimestamp();
}
@ -138,17 +146,20 @@ class SpectrogramCalculator : public CalculatorBase {
// Convert the output of the spectrogram object into a Matrix (or an
// Eigen::MatrixXcf if complex-valued output is requested) and pass to
// MediaPipe output.
::mediapipe::Status ProcessVector(const Matrix& input_stream,
CalculatorContext* cc);
absl::Status ProcessVector(const Matrix& input_stream, CalculatorContext* cc);
// Templated function to process either real- or complex-output spectrogram.
template <class OutputMatrixType>
::mediapipe::Status ProcessVectorToOutput(
absl::Status ProcessVectorToOutput(
const Matrix& input_stream,
const OutputMatrixType postprocess_output_fn(const OutputMatrixType&),
CalculatorContext* cc);
// Use the MediaPipe timestamp instead of the estimated one. Useful when the
// data is intermittent.
bool use_local_timestamp_;
Timestamp last_local_output_timestamp_;
double input_sample_rate_;
bool pad_final_packet_;
int frame_duration_samples_;
@ -157,6 +168,9 @@ class SpectrogramCalculator : public CalculatorBase {
int64 cumulative_input_samples_;
// How many frames we've emitted, used for calculating output time stamps.
int64 cumulative_completed_frames_;
// How many frames were emitted last, used for estimating the timestamp on
// Close when use_local_timestamp_ is true;
int64 last_completed_frames_;
Timestamp initial_input_timestamp_;
int num_input_channels_;
// How many frequency bins we emit (=N_FFT/2 + 1).
@ -177,7 +191,7 @@ REGISTER_CALCULATOR(SpectrogramCalculator);
// Factor to convert ln(magnitude_squared) to deciBels = 10.0/ln(10.0).
const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
::mediapipe::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
SpectrogramCalculatorOptions spectrogram_options =
cc->Options<SpectrogramCalculatorOptions>();
@ -271,11 +285,20 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
Adopt(multichannel_output_header.release()));
}
cumulative_completed_frames_ = 0;
last_completed_frames_ = 0;
initial_input_timestamp_ = Timestamp::Unstarted();
return ::mediapipe::OkStatus();
if (use_local_timestamp_) {
// Inform the framework that the calculator will output packets at the same
// timestamps as input packets to enable packet queueing optimizations. The
// final packet (emitted from Close()) does not follow this rule but it's
// sufficient that its timestamp is strictly greater than the timestamp of
// the previous packet.
cc->SetOffset(0);
}
return absl::OkStatus();
}
::mediapipe::Status SpectrogramCalculator::Process(CalculatorContext* cc) {
absl::Status SpectrogramCalculator::Process(CalculatorContext* cc) {
if (initial_input_timestamp_ == Timestamp::Unstarted()) {
initial_input_timestamp_ = cc->InputTimestamp();
}
@ -291,7 +314,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
}
template <class OutputMatrixType>
::mediapipe::Status SpectrogramCalculator::ProcessVectorToOutput(
absl::Status SpectrogramCalculator::ProcessVectorToOutput(
const Matrix& input_stream,
const OutputMatrixType postprocess_output_fn(const OutputMatrixType&),
CalculatorContext* cc) {
@ -311,7 +334,7 @@ template <class OutputMatrixType>
if (!spectrogram_generators_[channel]->ComputeSpectrogram(
input_vector, &output_vectors)) {
return ::mediapipe::Status(mediapipe::StatusCode::kInternal,
return absl::Status(absl::StatusCode::kInternal,
"Spectrogram returned failure");
}
if (channel == 0) {
@ -354,12 +377,19 @@ template <class OutputMatrixType>
CurrentOutputTimestamp(cc));
}
cumulative_completed_frames_ += output_vectors.size();
last_completed_frames_ = output_vectors.size();
if (!use_local_timestamp_) {
// In non-local timestamp mode the timestamp of the next packet will be
// equal to CumulativeOutputTimestamp(). Inform the framework about this
// fact to enable packet queueing optimizations.
cc->Outputs().Index(0).SetNextTimestampBound(CumulativeOutputTimestamp());
}
return ::mediapipe::OkStatus();
}
return absl::OkStatus();
}
::mediapipe::Status SpectrogramCalculator::ProcessVector(
const Matrix& input_stream, CalculatorContext* cc) {
absl::Status SpectrogramCalculator::ProcessVector(const Matrix& input_stream,
CalculatorContext* cc) {
switch (output_type_) {
// These blocks deliberately ignore clang-format to preserve the
// "silhouette" of the different cases.
@ -394,13 +424,13 @@ template <class OutputMatrixType>
}
// clang-format on
default: {
return ::mediapipe::Status(mediapipe::StatusCode::kInvalidArgument,
return absl::Status(absl::StatusCode::kInvalidArgument,
"Unrecognized spectrogram output type.");
}
}
}
::mediapipe::Status SpectrogramCalculator::Close(CalculatorContext* cc) {
absl::Status SpectrogramCalculator::Close(CalculatorContext* cc) {
if (cumulative_input_samples_ > 0 && pad_final_packet_) {
// We can flush any remaining samples by sending frame_step_samples - 1
// zeros to the Process method, and letting it do its thing,
@ -416,7 +446,7 @@ template <class OutputMatrixType>
Matrix::Zero(num_input_channels_, required_padding_samples), cc);
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -50,7 +50,7 @@ class SpectrogramCalculatorTest
}
// Initializes and runs the test graph.
::mediapipe::Status Run() {
absl::Status Run() {
// Now that options are set, we can set up some internal constants.
frame_duration_samples_ =
round(options_.frame_duration_seconds() * input_sample_rate_);

View File

@ -41,17 +41,17 @@ namespace mediapipe {
// }
class StabilizedLogCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Input stream with TimeSeriesHeader.
);
cc->Outputs().Index(0).Set<Matrix>(
// Output stabilized log stream with TimeSeriesHeader.
);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
StabilizedLogCalculatorOptions stabilized_log_calculator_options =
cc->Options<StabilizedLogCalculatorOptions>();
@ -70,23 +70,23 @@ class StabilizedLogCalculator : public CalculatorBase {
cc->Outputs().Index(0).SetHeader(
Adopt(new TimeSeriesHeader(input_header)));
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
auto input_matrix = cc->Inputs().Index(0).Get<Matrix>();
if (input_matrix.array().isNaN().any()) {
return ::mediapipe::InvalidArgumentError("NaN input to log operation.");
return absl::InvalidArgumentError("NaN input to log operation.");
}
if (check_nonnegativity_) {
if (input_matrix.minCoeff() < 0.0) {
return ::mediapipe::OutOfRangeError("Negative input to log operation.");
return absl::OutOfRangeError("Negative input to log operation.");
}
}
std::unique_ptr<Matrix> output_frame(new Matrix(
output_scale_ * (input_matrix.array() + stabilizer_).log().matrix()));
cc->Outputs().Index(0).Add(output_frame.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
private:

View File

@ -66,26 +66,26 @@ namespace mediapipe {
// cumulative_completed_samples / sample_rate_.
class TimeSeriesFramerCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<Matrix>(
// Input stream with TimeSeriesHeader.
);
cc->Outputs().Index(0).Set<Matrix>(
// Fixed length time series Packets with TimeSeriesHeader.
);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
// Returns FAIL if the input stream header is invalid.
::mediapipe::Status Open(CalculatorContext* cc) override;
absl::Status Open(CalculatorContext* cc) override;
// Outputs as many framed packets as possible given the accumulated
// input. Always returns OK.
::mediapipe::Status Process(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
// Flushes any remaining samples in a zero-padded packet. Always
// returns OK.
::mediapipe::Status Close(CalculatorContext* cc) override;
absl::Status Close(CalculatorContext* cc) override;
private:
// Adds input data to the internal buffer.
@ -134,7 +134,6 @@ class TimeSeriesFramerCalculator : public CalculatorBase {
// emulate_fractional_frame_overlap is true.
double average_frame_step_samples_;
int samples_still_to_drop_;
int64 cumulative_input_samples_;
int64 cumulative_output_frames_;
// "Completed" samples are samples that are no longer needed because
// the framer has completely stepped past them (taking into account
@ -163,8 +162,6 @@ void TimeSeriesFramerCalculator::EnqueueInput(CalculatorContext* cc) {
sample_buffer_.emplace_back(std::make_pair(
input_frame.col(i), CurrentSampleTimestamp(cc->InputTimestamp(), i)));
}
cumulative_input_samples_ += input_frame.cols();
}
void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
@ -203,9 +200,15 @@ void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
++cumulative_output_frames_;
cumulative_completed_samples_ += frame_step_samples;
}
if (!use_local_timestamp_) {
// In non-local timestamp mode the timestamp of the next packet will be
// equal to CumulativeOutputTimestamp(). Inform the framework about this
// fact to enable packet queueing optimizations.
cc->Outputs().Index(0).SetNextTimestampBound(CumulativeOutputTimestamp());
}
}
::mediapipe::Status TimeSeriesFramerCalculator::Process(CalculatorContext* cc) {
absl::Status TimeSeriesFramerCalculator::Process(CalculatorContext* cc) {
if (initial_input_timestamp_ == Timestamp::Unstarted()) {
initial_input_timestamp_ = cc->InputTimestamp();
current_timestamp_ = initial_input_timestamp_;
@ -214,10 +217,10 @@ void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
EnqueueInput(cc);
FrameOutput(cc);
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status TimeSeriesFramerCalculator::Close(CalculatorContext* cc) {
absl::Status TimeSeriesFramerCalculator::Close(CalculatorContext* cc) {
while (samples_still_to_drop_ > 0 && !sample_buffer_.empty()) {
sample_buffer_.pop_front();
--samples_still_to_drop_;
@ -234,10 +237,10 @@ void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
CurrentOutputTimestamp());
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status TimeSeriesFramerCalculator::Open(CalculatorContext* cc) {
absl::Status TimeSeriesFramerCalculator::Open(CalculatorContext* cc) {
TimeSeriesFramerCalculatorOptions framer_options =
cc->Options<TimeSeriesFramerCalculatorOptions>();
@ -286,7 +289,6 @@ void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
}
cc->Outputs().Index(0).SetHeader(Adopt(output_header));
cumulative_completed_samples_ = 0;
cumulative_input_samples_ = 0;
cumulative_output_frames_ = 0;
samples_still_to_drop_ = 0;
initial_input_timestamp_ = Timestamp::Unstarted();
@ -317,7 +319,7 @@ void TimeSeriesFramerCalculator::FrameOutput(CalculatorContext* cc) {
}
use_local_timestamp_ = framer_options.use_local_timestamp();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -69,7 +69,7 @@ class TimeSeriesFramerCalculatorTest
}
// Initializes and runs the test graph.
::mediapipe::Status Run() {
absl::Status Run() {
InitializeGraph();
FillInputHeader();
@ -441,7 +441,7 @@ class TimeSeriesFramerCalculatorTimestampingTest
}
}
::mediapipe::Status RunTimestampTest() {
absl::Status RunTimestampTest() {
InitializeGraph();
InitializeInputForTimeStampingTest();
FillInputHeader();

View File

@ -13,181 +13,131 @@
# limitations under the License.
#
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
proto_library(
mediapipe_proto_library(
name = "concatenate_vector_calculator_proto",
srcs = ["concatenate_vector_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "dequantize_byte_array_calculator_proto",
srcs = ["dequantize_byte_array_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "packet_cloner_calculator_proto",
srcs = ["packet_cloner_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "packet_resampler_calculator_proto",
srcs = ["packet_resampler_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "packet_thinner_calculator_proto",
srcs = ["packet_thinner_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "split_vector_calculator_proto",
srcs = ["split_vector_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "quantize_float_vector_calculator_proto",
srcs = ["quantize_float_vector_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "sequence_shift_calculator_proto",
srcs = ["sequence_shift_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "gate_calculator_proto",
srcs = ["gate_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
mediapipe_proto_library(
name = "constant_side_packet_calculator_proto",
srcs = ["constant_side_packet_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/framework/formats:classification_proto",
],
)
mediapipe_proto_library(
name = "clip_vector_size_calculator_proto",
srcs = ["clip_vector_size_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
proto_library(
name = "clip_vector_size_calculator_proto",
srcs = ["clip_vector_size_calculator.proto"],
mediapipe_proto_library(
name = "flow_limiter_calculator_proto",
srcs = ["flow_limiter_calculator.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
)
mediapipe_cc_proto_library(
name = "packet_cloner_calculator_cc_proto",
srcs = ["packet_cloner_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":packet_cloner_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "packet_resampler_calculator_cc_proto",
srcs = ["packet_resampler_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":packet_resampler_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "packet_thinner_calculator_cc_proto",
srcs = ["packet_thinner_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":packet_thinner_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "split_vector_calculator_cc_proto",
srcs = ["split_vector_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":split_vector_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "concatenate_vector_calculator_cc_proto",
srcs = ["concatenate_vector_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":concatenate_vector_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "clip_vector_size_calculator_cc_proto",
srcs = ["clip_vector_size_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":clip_vector_size_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "dequantize_byte_array_calculator_cc_proto",
srcs = ["dequantize_byte_array_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":dequantize_byte_array_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "quantize_float_vector_calculator_cc_proto",
srcs = ["quantize_float_vector_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":quantize_float_vector_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "sequence_shift_calculator_cc_proto",
srcs = ["sequence_shift_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":sequence_shift_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "gate_calculator_cc_proto",
srcs = ["gate_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":gate_calculator_proto"],
)
mediapipe_cc_proto_library(
name = "constant_side_packet_calculator_cc_proto",
srcs = ["constant_side_packet_calculator.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":constant_side_packet_calculator_proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
@ -196,6 +146,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:status",
],
@ -286,15 +237,28 @@ cc_library(
name = "concatenate_vector_calculator",
srcs = ["concatenate_vector_calculator.cc"],
hdrs = ["concatenate_vector_calculator.h"],
copts = select({
# Needed for "//mediapipe/framework/formats:tensor" compatibility on Apple
# platforms for Metal pulled in via the tensor.h header.
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":concatenate_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework:calculator_framework",
"//mediapipe/util:render_data_cc_proto",
"@org_tensorflow//tensorflow/lite:framework",
] + select({
"//mediapipe/gpu:disable_gpu": [],
@ -325,6 +289,7 @@ cc_library(
deps = [
":concatenate_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
@ -432,6 +397,7 @@ cc_library(
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
@ -445,7 +411,7 @@ cc_library(
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:status",
"@eigen_archive//:eigen",
@ -461,7 +427,7 @@ cc_library(
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:status",
"@eigen_archive//:eigen",
@ -477,6 +443,7 @@ cc_library(
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/stream_handler:mux_input_stream_handler",
],
@ -589,6 +556,7 @@ cc_library(
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:ret_check",
],
alwayslink = 1,
@ -645,6 +613,7 @@ cc_library(
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/stream_handler:immediate_input_stream_handler",
@ -657,6 +626,7 @@ cc_library(
srcs = ["flow_limiter_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":flow_limiter_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
@ -832,6 +802,7 @@ cc_test(
srcs = ["flow_limiter_calculator_test.cc"],
deps = [
":flow_limiter_calculator",
":flow_limiter_calculator_cc_proto",
"//mediapipe/calculators/core:counting_source_calculator",
"//mediapipe/calculators/core:pass_through_calculator",
"//mediapipe/framework:calculator_framework",
@ -843,6 +814,8 @@ cc_test(
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/stream_handler:immediate_input_stream_handler",
"//mediapipe/framework/tool:simulation_clock",
"//mediapipe/framework/tool:simulation_clock_executor",
"//mediapipe/framework/tool:sink",
"@com_google_absl//absl/time",
],
@ -852,14 +825,23 @@ cc_library(
name = "split_vector_calculator",
srcs = ["split_vector_calculator.cc"],
hdrs = ["split_vector_calculator.h"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":split_vector_calculator_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/util:resource_util",
@ -984,6 +966,7 @@ cc_library(
deps = [
":sequence_shift_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
@ -1033,6 +1016,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:logging",
@ -1068,6 +1052,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
@ -1121,6 +1106,7 @@ cc_library(
":constant_side_packet_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:collection_item_id",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",

View File

@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/logging.h"
namespace mediapipe {
namespace api2 {
// Attach the header from a stream or side input to another stream.
//
@ -42,49 +44,41 @@ namespace mediapipe {
// output_stream: "audio_with_header"
// }
//
class AddHeaderCalculator : public CalculatorBase {
class AddHeaderCalculator : public Node {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
bool has_side_input = false;
bool has_header_stream = false;
if (cc->InputSidePackets().HasTag("HEADER")) {
cc->InputSidePackets().Tag("HEADER").SetAny();
has_side_input = true;
}
if (cc->Inputs().HasTag("HEADER")) {
cc->Inputs().Tag("HEADER").SetNone();
has_header_stream = true;
}
if (has_side_input == has_header_stream) {
return mediapipe::InvalidArgumentError(
static constexpr Input<NoneType>::Optional kHeader{"HEADER"};
static constexpr SideInput<AnyType>::Optional kHeaderSide{"HEADER"};
static constexpr Input<AnyType> kData{"DATA"};
static constexpr Output<SameType<kData>> kOut{""};
MEDIAPIPE_NODE_CONTRACT(kHeader, kHeaderSide, kData, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
if (kHeader(cc).IsConnected() == kHeaderSide(cc).IsConnected()) {
return absl::InvalidArgumentError(
"Header must be provided via exactly one of side input and input "
"stream");
}
cc->Inputs().Tag("DATA").SetAny();
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Tag("DATA"));
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
Packet header;
if (cc->InputSidePackets().HasTag("HEADER")) {
header = cc->InputSidePackets().Tag("HEADER");
}
if (cc->Inputs().HasTag("HEADER")) {
header = cc->Inputs().Tag("HEADER").Header();
}
absl::Status Open(CalculatorContext* cc) override {
const PacketBase& header =
kHeader(cc).IsConnected() ? kHeader(cc).Header() : kHeaderSide(cc);
if (!header.IsEmpty()) {
cc->Outputs().Index(0).SetHeader(header);
kOut(cc).SetHeader(header);
}
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
cc->SetOffset(0);
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
cc->Outputs().Index(0).AddPacket(cc->Inputs().Tag("DATA").Value());
return ::mediapipe::OkStatus();
absl::Status Process(CalculatorContext* cc) override {
kOut(cc).Send(kData(cc).packet());
return absl::OkStatus();
}
};
REGISTER_CALCULATOR(AddHeaderCalculator);
MEDIAPIPE_REGISTER_NODE(AddHeaderCalculator);
} // namespace api2
} // namespace mediapipe

View File

@ -153,7 +153,7 @@ TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
}
// Run should fail because header can only be provided one way.
EXPECT_EQ(runner.Run().code(), ::mediapipe::InvalidArgumentError("").code());
EXPECT_EQ(runner.Run().code(), absl::InvalidArgumentError("").code());
}
} // namespace mediapipe

View File

@ -42,22 +42,22 @@ REGISTER_CALCULATOR(BeginLoopIntegerCalculator);
class IncrementCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<int>();
cc->Outputs().Index(0).Set<int>();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
const int& input_int = cc->Inputs().Index(0).Get<int>();
auto output_int = absl::make_unique<int>(input_int + 1);
cc->Outputs().Index(0).Add(output_int.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
};
@ -166,19 +166,19 @@ TEST_F(BeginEndLoopCalculatorGraphTest, MultipleVectors) {
// bound update.
class PassThroughOrEmptyVectorCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->SetProcessTimestampBounds(true);
cc->Inputs().Index(0).Set<std::vector<int>>();
cc->Outputs().Index(0).Set<std::vector<int>>();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
if (!cc->Inputs().Index(0).IsEmpty()) {
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
} else {
@ -186,7 +186,7 @@ class PassThroughOrEmptyVectorCalculator : public CalculatorBase {
MakePacket<std::vector<int>>(std::vector<int>())
.At(cc->InputTimestamp()));
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
};
@ -311,24 +311,24 @@ TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest, MultipleVectors) {
class MultiplierCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).Set<int>();
cc->Inputs().Index(1).Set<int>();
cc->Outputs().Index(0).Set<int>();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
const int& input_int = cc->Inputs().Index(0).Get<int>();
const int& multiplier_int = cc->Inputs().Index(1).Get<int>();
auto output_int = absl::make_unique<int>(input_int * multiplier_int);
cc->Outputs().Index(0).Add(output_int.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
};

View File

@ -61,7 +61,7 @@ class BeginLoopCalculator : public CalculatorBase {
using ItemT = typename IterableT::value_type;
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
// The below enables processing of timestamp bound updates, and that enables
// correct timestamp propagation by the companion EndLoopCalculator.
//
@ -106,10 +106,10 @@ class BeginLoopCalculator : public CalculatorBase {
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) final {
absl::Status Process(CalculatorContext* cc) final {
Timestamp last_timestamp = loop_internal_timestamp_;
if (!cc->Inputs().Tag("ITERABLE").IsEmpty()) {
const IterableT& collection =
@ -139,7 +139,7 @@ class BeginLoopCalculator : public CalculatorBase {
.AddPacket(MakePacket<Timestamp>(cc->InputTimestamp())
.At(Timestamp(loop_internal_timestamp_ - 1)));
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
private:

View File

@ -33,7 +33,7 @@ namespace mediapipe {
// input_stream: "input_vector"
// output_stream: "output_vector"
// options {
// [mediapipe.ClipIntVectorSizeCalculatorOptions.ext] {
// [mediapipe.ClipVectorSizeCalculatorOptions.ext] {
// max_vec_size: 5
// }
// }
@ -43,13 +43,13 @@ namespace mediapipe {
template <typename T>
class ClipVectorSizeCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() == 1);
RET_CHECK(cc->Outputs().NumEntries() == 1);
if (cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>()
.max_vec_size() < 1) {
return ::mediapipe::InternalError(
return absl::InternalError(
"max_vec_size should be greater than or equal to 1.");
}
@ -60,10 +60,10 @@ class ClipVectorSizeCalculator : public CalculatorBase {
cc->InputSidePackets().Index(0).Set<int>();
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
max_vec_size_ = cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>()
.max_vec_size();
@ -72,23 +72,23 @@ class ClipVectorSizeCalculator : public CalculatorBase {
!cc->InputSidePackets().Index(0).IsEmpty()) {
max_vec_size_ = cc->InputSidePackets().Index(0).Get<int>();
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
if (max_vec_size_ < 1) {
return ::mediapipe::InternalError(
return absl::InternalError(
"max_vec_size should be greater than or equal to 1.");
}
if (cc->Inputs().Index(0).IsEmpty()) {
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
return ClipVectorSize<T>(std::is_copy_constructible<T>(), cc);
}
template <typename U>
::mediapipe::Status ClipVectorSize(std::true_type, CalculatorContext* cc) {
absl::Status ClipVectorSize(std::true_type, CalculatorContext* cc) {
auto output = absl::make_unique<std::vector<U>>();
const std::vector<U>& input_vector =
cc->Inputs().Index(0).Get<std::vector<U>>();
@ -100,24 +100,23 @@ class ClipVectorSizeCalculator : public CalculatorBase {
}
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
template <typename U>
::mediapipe::Status ClipVectorSize(std::false_type, CalculatorContext* cc) {
absl::Status ClipVectorSize(std::false_type, CalculatorContext* cc) {
return ConsumeAndClipVectorSize<T>(std::is_move_constructible<U>(), cc);
}
template <typename U>
::mediapipe::Status ConsumeAndClipVectorSize(std::true_type,
CalculatorContext* cc) {
absl::Status ConsumeAndClipVectorSize(std::true_type, CalculatorContext* cc) {
auto output = absl::make_unique<std::vector<U>>();
::mediapipe::StatusOr<std::unique_ptr<std::vector<U>>> input_status =
absl::StatusOr<std::unique_ptr<std::vector<U>>> input_status =
cc->Inputs().Index(0).Value().Consume<std::vector<U>>();
if (input_status.ok()) {
std::unique_ptr<std::vector<U>> input_vector =
std::move(input_status).ValueOrDie();
std::move(input_status).value();
auto begin_it = input_vector->begin();
auto end_it = input_vector->end();
if (max_vec_size_ < input_vector->size()) {
@ -129,13 +128,13 @@ class ClipVectorSizeCalculator : public CalculatorBase {
return input_status.status();
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
template <typename U>
::mediapipe::Status ConsumeAndClipVectorSize(std::false_type,
absl::Status ConsumeAndClipVectorSize(std::false_type,
CalculatorContext* cc) {
return ::mediapipe::InternalError(
return absl::InternalError(
"Cannot copy or move input vectors and clip their size.");
}

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message ClipVectorSizeCalculatorOptions {
extend CalculatorOptions {
optional ClipVectorSizeCalculatorOptions ext = 274674998;

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2019-2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -20,14 +20,16 @@
namespace mediapipe {
// Example config:
//
// node {
// calculator: "ConcatenateDetectionVectorCalculator"
// input_stream: "detection_vector_1"
// input_stream: "detection_vector_2"
// output_stream: "concatenated_detection_vector"
// }
//
typedef ConcatenateVectorCalculator<::mediapipe::Detection>
ConcatenateDetectionVectorCalculator;
REGISTER_CALCULATOR(ConcatenateDetectionVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateDetectionVectorCalculator);
} // namespace mediapipe

View File

@ -16,6 +16,7 @@
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
@ -23,61 +24,55 @@
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// Concatenates several NormalizedLandmarkList protos following stream index
// order. This class assumes that every input stream contains a
// NormalizedLandmarkList proto object.
class ConcatenateNormalizedLandmarkListCalculator : public CalculatorBase {
class ConcatenateNormalizedLandmarkListCalculator : public Node {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() != 0);
RET_CHECK(cc->Outputs().NumEntries() == 1);
static constexpr Input<NormalizedLandmarkList>::Multiple kIn{""};
static constexpr Output<NormalizedLandmarkList> kOut{""};
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
cc->Inputs().Index(i).Set<NormalizedLandmarkList>();
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
cc->Outputs().Index(0).Set<NormalizedLandmarkList>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
if (cc->Inputs().Index(i).IsEmpty()) return ::mediapipe::OkStatus();
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return absl::OkStatus();
}
}
NormalizedLandmarkList output;
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
if (cc->Inputs().Index(i).IsEmpty()) continue;
const NormalizedLandmarkList& input =
cc->Inputs().Index(i).Get<NormalizedLandmarkList>();
for (int j = 0; j < input.landmark_size(); ++j) {
const NormalizedLandmark& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
const NormalizedLandmarkList& list = *input;
for (int j = 0; j < list.landmark_size(); ++j) {
*output.add_landmark() = list.landmark(j);
}
}
cc->Outputs().Index(0).AddPacket(
MakePacket<NormalizedLandmarkList>(output).At(cc->InputTimestamp()));
return ::mediapipe::OkStatus();
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
private:
bool only_emit_if_all_present_;
};
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
REGISTER_CALCULATOR(ConcatenateNormalizedLandmarkListCalculator);
} // namespace api2
} // namespace mediapipe
// NOLINTNEXTLINE

View File

@ -18,12 +18,14 @@
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/util/render_data.pb.h"
#include "tensorflow/lite/interpreter.h"
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
#endif // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
namespace mediapipe {
@ -35,7 +37,7 @@ namespace mediapipe {
// output_stream: "concatenated_float_vector"
// }
typedef ConcatenateVectorCalculator<float> ConcatenateFloatVectorCalculator;
REGISTER_CALCULATOR(ConcatenateFloatVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateFloatVectorCalculator);
// Example config:
// node {
@ -45,10 +47,13 @@ REGISTER_CALCULATOR(ConcatenateFloatVectorCalculator);
// output_stream: "concatenated_int32_vector"
// }
typedef ConcatenateVectorCalculator<int32> ConcatenateInt32VectorCalculator;
REGISTER_CALCULATOR(ConcatenateInt32VectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateInt32VectorCalculator);
typedef ConcatenateVectorCalculator<uint64> ConcatenateUInt64VectorCalculator;
REGISTER_CALCULATOR(ConcatenateUInt64VectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateUInt64VectorCalculator);
typedef ConcatenateVectorCalculator<bool> ConcatenateBoolVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateBoolVectorCalculator);
// Example config:
// node {
@ -59,24 +64,31 @@ REGISTER_CALCULATOR(ConcatenateUInt64VectorCalculator);
// }
typedef ConcatenateVectorCalculator<TfLiteTensor>
ConcatenateTfLiteTensorVectorCalculator;
REGISTER_CALCULATOR(ConcatenateTfLiteTensorVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateTfLiteTensorVectorCalculator);
typedef ConcatenateVectorCalculator<Tensor> ConcatenateTensorVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateTensorVectorCalculator);
typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
ConcatenateLandmarkVectorCalculator;
REGISTER_CALCULATOR(ConcatenateLandmarkVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkVectorCalculator);
typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmarkList>
ConcatenateLandmarListVectorCalculator;
REGISTER_CALCULATOR(ConcatenateLandmarListVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarListVectorCalculator);
typedef ConcatenateVectorCalculator<mediapipe::ClassificationList>
ConcatenateClassificationListVectorCalculator;
REGISTER_CALCULATOR(ConcatenateClassificationListVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateClassificationListVectorCalculator);
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
typedef ConcatenateVectorCalculator<::tflite::gpu::gl::GlBuffer>
ConcatenateGlBufferVectorCalculator;
REGISTER_CALCULATOR(ConcatenateGlBufferVectorCalculator);
MEDIAPIPE_REGISTER_NODE(ConcatenateGlBufferVectorCalculator);
#endif
typedef ConcatenateVectorCalculator<mediapipe::RenderData>
ConcatenateRenderDataVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateRenderDataVectorCalculator);
} // namespace mediapipe

View File

@ -20,122 +20,96 @@
#include <vector>
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
// Note: since this is a calculator template that can be included by other
// source files, we do not place this in namespace api2 directly, but qualify
// the api2 names below, to avoid changing the visible name of the class.
// We cannot simply write "using mediapipe::api2" since it's a header file.
// This distinction will go away once api2 is finalized.
// Concatenates several objects of type T or std::vector<T> following stream
// index order. This class assumes that every input stream contains either T or
// vector<T> type. To use this class for a particular type T, regisiter a
// calculator using ConcatenateVectorCalculator<T>.
template <typename T>
class ConcatenateVectorCalculator : public CalculatorBase {
class ConcatenateVectorCalculator : public api2::Node {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() != 0);
RET_CHECK(cc->Outputs().NumEntries() == 1);
static constexpr
typename api2::Input<api2::OneOf<T, std::vector<T>>>::Multiple kIn{""};
static constexpr api2::Output<std::vector<T>> kOut{""};
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
// Actual type T or vector<T> will be validated in Process().
cc->Inputs().Index(i).SetAny();
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
cc->Outputs().Index(0).Set<std::vector<T>>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
if (cc->Inputs().Index(i).IsEmpty()) return ::mediapipe::OkStatus();
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return ::absl::OkStatus();
}
}
return ConcatenateVectors<T>(std::is_copy_constructible<T>(), cc);
}
template <typename U>
::mediapipe::Status ConcatenateVectors(std::true_type,
CalculatorContext* cc) {
auto output = absl::make_unique<std::vector<U>>();
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
auto& input = cc->Inputs().Index(i);
absl::Status ConcatenateVectors(std::true_type, CalculatorContext* cc) {
auto output = std::vector<U>();
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
if (input.Value().ValidateAsType<U>().ok()) {
const U& value = input.Get<U>();
output->push_back(value);
} else if (input.Value().ValidateAsType<std::vector<U>>().ok()) {
const std::vector<U>& value = input.Get<std::vector<U>>();
output->insert(output->end(), value.begin(), value.end());
} else {
return ::mediapipe::InvalidArgumentError("Invalid input stream type.");
input.Visit([&output](const U& value) { output.push_back(value); },
[&output](const std::vector<U>& value) {
output.insert(output.end(), value.begin(), value.end());
});
}
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
template <typename U>
::mediapipe::Status ConcatenateVectors(std::false_type,
CalculatorContext* cc) {
absl::Status ConcatenateVectors(std::false_type, CalculatorContext* cc) {
return ConsumeAndConcatenateVectors<T>(std::is_move_constructible<U>(), cc);
}
template <typename U>
::mediapipe::Status ConsumeAndConcatenateVectors(std::true_type,
absl::Status ConsumeAndConcatenateVectors(std::true_type,
CalculatorContext* cc) {
auto output = absl::make_unique<std::vector<U>>();
for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
auto& input = cc->Inputs().Index(i);
auto output = std::vector<U>();
for (auto input : kIn(cc)) {
if (input.IsEmpty()) continue;
if (input.Value().ValidateAsType<U>().ok()) {
::mediapipe::StatusOr<std::unique_ptr<U>> value_status =
input.Value().Consume<U>();
if (value_status.ok()) {
std::unique_ptr<U> value = std::move(value_status).ValueOrDie();
output->push_back(std::move(*value));
} else {
return value_status.status();
}
} else if (input.Value().ValidateAsType<std::vector<U>>().ok()) {
::mediapipe::StatusOr<std::unique_ptr<std::vector<U>>> value_status =
input.Value().Consume<std::vector<U>>();
if (value_status.ok()) {
std::unique_ptr<std::vector<U>> value =
std::move(value_status).ValueOrDie();
output->insert(output->end(), std::make_move_iterator(value->begin()),
MP_RETURN_IF_ERROR(input.ConsumeAndVisit(
[&output](std::unique_ptr<U> value) {
output.push_back(std::move(*value));
},
[&output](std::unique_ptr<std::vector<U>> value) {
output.insert(output.end(), std::make_move_iterator(value->begin()),
std::make_move_iterator(value->end()));
} else {
return value_status.status();
}));
}
} else {
return ::mediapipe::InvalidArgumentError("Invalid input stream type.");
}
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
template <typename U>
::mediapipe::Status ConsumeAndConcatenateVectors(std::false_type,
absl::Status ConsumeAndConcatenateVectors(std::false_type,
CalculatorContext* cc) {
return ::mediapipe::InternalError(
return absl::InternalError(
"Cannot copy or move inputs to concatenate them");
}

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message ConcatenateVectorCalculatorOptions {
extend CalculatorOptions {
optional ConcatenateVectorCalculatorOptions ext = 259397839;

View File

@ -28,7 +28,7 @@
namespace mediapipe {
typedef ConcatenateVectorCalculator<int> TestConcatenateIntVectorCalculator;
REGISTER_CALCULATOR(TestConcatenateIntVectorCalculator);
MEDIAPIPE_REGISTER_NODE(TestConcatenateIntVectorCalculator);
void AddInputVector(int index, const std::vector<int>& input, int64 timestamp,
CalculatorRunner* runner) {
@ -384,7 +384,7 @@ TEST(ConcatenateFloatVectorCalculatorTest, OneEmptyStreamNoOutput) {
typedef ConcatenateVectorCalculator<std::unique_ptr<int>>
TestConcatenateUniqueIntPtrCalculator;
REGISTER_CALCULATOR(TestConcatenateUniqueIntPtrCalculator);
MEDIAPIPE_REGISTER_NODE(TestConcatenateUniqueIntPtrCalculator);
TEST(TestConcatenateUniqueIntVectorCalculatorTest, ConsumeOneTimestamp) {
/* Note: We don't use CalculatorRunner for this test because it keeps copies

View File

@ -17,6 +17,7 @@
#include "mediapipe/calculators/core/constant_side_packet_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/ret_check.h"
@ -24,6 +25,8 @@
namespace mediapipe {
namespace {} // namespace
// Generates an output side packet or multiple output side packets according to
// the specified options.
//
@ -51,7 +54,7 @@ namespace mediapipe {
// }
class ConstantSidePacketCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
static absl::Status GetContract(CalculatorContract* cc) {
const auto& options =
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
RET_CHECK_EQ(cc->OutputSidePackets().NumEntries(kPacketTag),
@ -74,15 +77,17 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set<std::string>();
} else if (packet_options.has_uint64_value()) {
packet.Set<uint64>();
} else if (packet_options.has_classification_list_value()) {
packet.Set<ClassificationList>();
} else {
return ::mediapipe::InvalidArgumentError(
return absl::InvalidArgumentError(
"None of supported values were specified in options.");
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
absl::Status Open(CalculatorContext* cc) override {
const auto& options =
cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>();
int index = 0;
@ -100,16 +105,19 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set(MakePacket<std::string>(packet_options.string_value()));
} else if (packet_options.has_uint64_value()) {
packet.Set(MakePacket<uint64>(packet_options.uint64_value()));
} else if (packet_options.has_classification_list_value()) {
packet.Set(MakePacket<ClassificationList>(
packet_options.classification_list_value()));
} else {
return ::mediapipe::InvalidArgumentError(
return absl::InvalidArgumentError(
"None of supported values were specified in options.");
}
}
return ::mediapipe::OkStatus();
return absl::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) override {
return ::mediapipe::OkStatus();
absl::Status Process(CalculatorContext* cc) override {
return absl::OkStatus();
}
private:

View File

@ -17,6 +17,9 @@ syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/framework/formats/classification.proto";
option objc_class_prefix = "MediaPipe";
message ConstantSidePacketCalculatorOptions {
extend CalculatorOptions {
@ -30,6 +33,7 @@ message ConstantSidePacketCalculatorOptions {
bool bool_value = 3;
string string_value = 4;
uint64 uint64_value = 5;
ClassificationList classification_list_value = 6;
}
}

View File

@ -40,7 +40,7 @@ void DoTestSingleSidePacket(absl::string_view packet_spec,
}
)";
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(graph_config_template, packet_spec));
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
@ -49,7 +49,7 @@ void DoTestSingleSidePacket(absl::string_view packet_spec,
MP_ASSERT_OK(graph.GetOutputSidePacket("packet"));
auto actual_value =
graph.GetOutputSidePacket("packet").ValueOrDie().template Get<T>();
graph.GetOutputSidePacket("packet").value().template Get<T>();
EXPECT_EQ(actual_value, expected_value);
}
@ -62,7 +62,7 @@ TEST(ConstantSidePacketCalculatorTest, EveryPossibleType) {
TEST(ConstantSidePacketCalculatorTest, MultiplePackets) {
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:int_packet"
@ -89,33 +89,29 @@ TEST(ConstantSidePacketCalculatorTest, MultiplePackets) {
MP_ASSERT_OK(graph.WaitUntilIdle());
MP_ASSERT_OK(graph.GetOutputSidePacket("int_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("int_packet").ValueOrDie().Get<int>(),
256);
EXPECT_EQ(graph.GetOutputSidePacket("int_packet").value().Get<int>(), 256);
MP_ASSERT_OK(graph.GetOutputSidePacket("float_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("float_packet").ValueOrDie().Get<float>(),
EXPECT_EQ(graph.GetOutputSidePacket("float_packet").value().Get<float>(),
0.5f);
MP_ASSERT_OK(graph.GetOutputSidePacket("bool_packet"));
EXPECT_FALSE(
graph.GetOutputSidePacket("bool_packet").ValueOrDie().Get<bool>());
EXPECT_FALSE(graph.GetOutputSidePacket("bool_packet").value().Get<bool>());
MP_ASSERT_OK(graph.GetOutputSidePacket("string_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("string_packet")
.ValueOrDie()
.Get<std::string>(),
EXPECT_EQ(
graph.GetOutputSidePacket("string_packet").value().Get<std::string>(),
"string");
MP_ASSERT_OK(graph.GetOutputSidePacket("another_string_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("another_string_packet")
.ValueOrDie()
.value()
.Get<std::string>(),
"another string");
MP_ASSERT_OK(graph.GetOutputSidePacket("another_int_packet"));
EXPECT_EQ(
graph.GetOutputSidePacket("another_int_packet").ValueOrDie().Get<int>(),
EXPECT_EQ(graph.GetOutputSidePacket("another_int_packet").value().Get<int>(),
128);
}
TEST(ConstantSidePacketCalculatorTest, ProcessingPacketsWithCorrectTagOnly) {
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:int_packet"
@ -142,24 +138,21 @@ TEST(ConstantSidePacketCalculatorTest, ProcessingPacketsWithCorrectTagOnly) {
MP_ASSERT_OK(graph.WaitUntilIdle());
MP_ASSERT_OK(graph.GetOutputSidePacket("int_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("int_packet").ValueOrDie().Get<int>(),
256);
EXPECT_EQ(graph.GetOutputSidePacket("int_packet").value().Get<int>(), 256);
MP_ASSERT_OK(graph.GetOutputSidePacket("float_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("float_packet").ValueOrDie().Get<float>(),
EXPECT_EQ(graph.GetOutputSidePacket("float_packet").value().Get<float>(),
0.5f);
MP_ASSERT_OK(graph.GetOutputSidePacket("bool_packet"));
EXPECT_FALSE(
graph.GetOutputSidePacket("bool_packet").ValueOrDie().Get<bool>());
EXPECT_FALSE(graph.GetOutputSidePacket("bool_packet").value().Get<bool>());
MP_ASSERT_OK(graph.GetOutputSidePacket("string_packet"));
EXPECT_EQ(graph.GetOutputSidePacket("string_packet")
.ValueOrDie()
.Get<std::string>(),
EXPECT_EQ(
graph.GetOutputSidePacket("string_packet").value().Get<std::string>(),
"string");
}
TEST(ConstantSidePacketCalculatorTest, IncorrectConfig_MoreOptionsThanPackets) {
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:int_packet"
@ -177,7 +170,7 @@ TEST(ConstantSidePacketCalculatorTest, IncorrectConfig_MoreOptionsThanPackets) {
TEST(ConstantSidePacketCalculatorTest, IncorrectConfig_MorePacketsThanOptions) {
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:0:int_packet"

Some files were not shown because too many files have changed in this diff Show More