diff --git a/MANIFEST.in b/MANIFEST.in index 9c033288b..33a48428c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,11 @@ global-exclude .git* global-exclude *_test.py -recursive-include mediapipe/models *.tflite *.txt +include CONTRIBUTING.md +include LICENSE +include MANIFEST.in +include README.md +include requirements.txt + recursive-include mediapipe/modules *.tflite *.txt recursive-include mediapipe/graphs *.binarypb diff --git a/README.md b/README.md index cfa0c2e77..d7287dc35 100644 --- a/README.md +++ b/README.md @@ -22,32 +22,33 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Iris 🆕 | Hands | Pose 🆕 -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :----: -[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](docs/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) +Face Detection | Face Mesh | Iris | Hands | Pose | Hair Segmentation +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---------------: +[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](docs/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) -Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT -:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT +:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](docs/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) -[]() | Android | iOS | Desktop | Python | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | -[Iris](https://google.github.io/mediapipe/solutions/iris) 🆕 | ✅ | ✅ | ✅ | | ✅ | -[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | -[Pose](https://google.github.io/mediapipe/solutions/pose) 🆕 | ✅ | ✅ | ✅ | ✅ | ✅ | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ | +[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | +[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | +[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | ## MediaPipe on the Web @@ -88,6 +89,8 @@ run code search using ## Publications +* [Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog) + in Google Developers Blog * [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) in Google AI Blog * [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) diff --git a/WORKSPACE b/WORKSPACE index a049c470c..a25d6c1d3 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -36,6 +36,19 @@ http_archive( urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], ) +http_archive( + name = "rules_foreign_cc", + strip_prefix = "rules_foreign_cc-master", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/master.zip", +) + +load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies") + +rules_foreign_cc_dependencies() + +# This is used to select all contents of the archives for CMake-based packages to give CMake access to them. +all_content = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])""" + # GoogleTest/GoogleMock framework. Used by most unit-tests. # Last updated 2020-06-30. http_archive( @@ -68,14 +81,23 @@ http_archive( url = "https://github.com/gflags/gflags/archive/v2.2.2.zip", ) -# glog v0.3.5 -# TODO: Migrate MediaPipe to use com_github_glog_glog on all platforms. +# 2020-08-21 http_archive( - name = "com_github_glog_glog_v_0_3_5", - url = "https://github.com/google/glog/archive/v0.3.5.zip", - sha256 = "267103f8a1e9578978aa1dc256001e6529ef593e5aea38193d31c2872ee025e8", - strip_prefix = "glog-0.3.5", - build_file = "@//third_party:glog.BUILD", + name = "com_github_glog_glog", + strip_prefix = "glog-0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6", + sha256 = "58c9b3b6aaa4dd8b836c0fd8f65d0f941441fb95e27212c5eeb9979cfd3592ab", + urls = [ + "https://github.com/google/glog/archive/0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6.zip", + ], +) +http_archive( + name = "com_github_glog_glog_no_gflags", + strip_prefix = "glog-0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6", + sha256 = "58c9b3b6aaa4dd8b836c0fd8f65d0f941441fb95e27212c5eeb9979cfd3592ab", + build_file = "@//third_party:glog_no_gflags.BUILD", + urls = [ + "https://github.com/google/glog/archive/0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6.zip", + ], patches = [ "@//third_party:com_github_glog_glog_9779e5ea6ef59562b030248947f787d1256132ae.diff" ], @@ -84,16 +106,6 @@ http_archive( ], ) -# 2020-02-16 -http_archive( - name = "com_github_glog_glog", - strip_prefix = "glog-3ba8976592274bc1f907c402ce22558011d6fc5e", - sha256 = "feca3c7e29a693cab7887409756d89d342d4a992d54d7c5599bebeae8f7b50be", - urls = [ - "https://github.com/google/glog/archive/3ba8976592274bc1f907c402ce22558011d6fc5e.zip", - ], -) - # easyexif http_archive( name = "easyexif", @@ -169,6 +181,13 @@ http_archive( sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1" ) +http_archive( + name = "opencv", + build_file_content = all_content, + strip_prefix = "opencv-3.4.10", + urls = ["https://github.com/opencv/opencv/archive/3.4.10.tar.gz"], +) + new_local_repository( name = "linux_opencv", build_file = "@//third_party:opencv_linux.BUILD", @@ -184,13 +203,13 @@ new_local_repository( new_local_repository( name = "macos_opencv", build_file = "@//third_party:opencv_macos.BUILD", - path = "/usr", + path = "/usr/local/opt/opencv@3", ) new_local_repository( name = "macos_ffmpeg", build_file = "@//third_party:ffmpeg_macos.BUILD", - path = "/usr", + path = "/usr/local/opt/ffmpeg", ) new_local_repository( @@ -301,9 +320,6 @@ load("@rules_jvm_external//:defs.bzl", "maven_install") maven_install( name = "maven", artifacts = [ - "junit:junit:4.12", - "androidx.test.espresso:espresso-core:3.1.1", - "org.hamcrest:hamcrest-library:1.3", "androidx.concurrent:concurrent-futures:1.0.0-alpha03", "androidx.lifecycle:lifecycle-common:2.2.0", "androidx.annotation:annotation:aar:1.1.0", @@ -314,11 +330,15 @@ maven_install( "androidx.core:core:aar:1.1.0-rc03", "androidx.legacy:legacy-support-v4:aar:1.0.0", "androidx.recyclerview:recyclerview:aar:1.1.0-beta02", + "androidx.test.espresso:espresso-core:3.1.1", + "com.github.bumptech.glide:glide:4.11.0", "com.google.android.material:material:aar:1.0.0-rc01", "com.google.code.findbugs:jsr305:3.0.2", "com.google.flogger:flogger-system-backend:0.3.1", "com.google.flogger:flogger:0.3.1", "com.google.guava:guava:27.0.1-android", + "junit:junit:4.12", + "org.hamcrest:hamcrest-library:1.3", ], repositories = [ "https://jcenter.bintray.com", diff --git a/docs/_config.yml b/docs/_config.yml index 4da202e75..a48c21d6d 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -22,8 +22,8 @@ aux_links: # Footer content appears at the bottom of every page's main content footer_content: "© 2020 GOOGLE LLC | PRIVACY POLICY | TERMS OF SERVICE" -# Color scheme currently only supports "dark" or nil (default) -color_scheme: nil +# Color scheme currently only supports "dark", "light"/nil (default), or a custom scheme that you define +color_scheme: mediapipe # Google Analytics Tracking (optional) ga_tracking: UA-140696581-2 diff --git a/docs/_sass/color_schemes/mediapipe.scss b/docs/_sass/color_schemes/mediapipe.scss new file mode 100644 index 000000000..6722e212d --- /dev/null +++ b/docs/_sass/color_schemes/mediapipe.scss @@ -0,0 +1 @@ +$link-color: #0097A7; diff --git a/docs/getting_started/building_examples.md b/docs/getting_started/building_examples.md index 35da25bef..842f1b155 100644 --- a/docs/getting_started/building_examples.md +++ b/docs/getting_started/building_examples.md @@ -425,7 +425,47 @@ Note: This currently works only on Linux, and please first follow ## Python -### Prerequisite +MediaPipe Python package is available on +[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip +install mediapipe` on Linux and macOS, as described below in +[Run in python interpreter](#run-in-python-interpreter) and in this +[colab](https://mediapipe.page.link/mp-py-colab). + +### Run in Python interpreter + +Using [MediaPipe Pose](../solutions/pose.md) as an example: + +```bash +# Activate a Python virtual environment. +$ python3 -m venv mp_env && source mp_env/bin/activate + +# Install MediaPipe Python package +(mp_env)$ pip install mediapipe + +# Run in Python interpreter +(mp_env)$ python3 +>>> import mediapipe as mp +>>> pose_tracker = mp.examples.UpperBodyPoseTracker() + +# For image input +>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file') +>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file') + +# For live camera input +# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.) +>>> pose_tracker.run_live() + +# Close the tracker. +>>> pose_tracker.close() +``` + +Tip: Use command `deactivate` to exit the Python virtual environment. + +### Building Python package from source + +Follow these steps only if you have local changes and need to build the Python +package from source. Otherwise, we strongly encourage our users to simply run +`pip install mediapipe`, more convenient and much faster. 1. Make sure that Bazel and OpenCV are correctly installed and configured for MediaPipe. Please see [Installation](./install.md) for how to setup Bazel @@ -445,50 +485,23 @@ Note: This currently works only on Linux, and please first follow $ brew install protobuf ``` -### Set up Python virtual environment. - -1. Activate a Python virtual environment. +3. Activate a Python virtual environment. ```bash $ python3 -m venv mp_env && source mp_env/bin/activate ``` -2. In the virtual environment, go to the MediaPipe repo directory. +4. In the virtual environment, go to the MediaPipe repo directory. -3. Install the required Python packages. +5. Install the required Python packages. ```bash (mp_env)mediapipe$ pip3 install -r requirements.txt ``` -4. Generate and install MediaPipe package. +6. Generate and install MediaPipe package. ```bash (mp_env)mediapipe$ python3 setup.py gen_protos - (mp_env)mediapipe$ python3 setup.py install + (mp_env)mediapipe$ python3 setup.py install --link-opencv ``` - -### Run in Python interpreter - -Make sure you are not in the MediaPipe repo directory. - -Using [MediaPipe Pose](../solutions/pose.md) as an example: - -```bash -(mp_env)$ python3 ->>> import mediapipe as mp ->>> pose_tracker = mp.examples.UpperBodyPoseTracker() - -# For image input ->>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file') ->>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file') - -# For live camera input -# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.) ->>> pose_tracker.run_live() - -# Close the tracker. ->>> pose_tracker.close() -``` - -Tip: Use command `deactivate` to exit the Python virtual environment. diff --git a/docs/images/mobile/instant_motion_tracking_android_small.gif b/docs/images/mobile/instant_motion_tracking_android_small.gif new file mode 100644 index 000000000..ff6d5537f Binary files /dev/null and b/docs/images/mobile/instant_motion_tracking_android_small.gif differ diff --git a/docs/index.md b/docs/index.md index ca446d0f4..3b67a53fa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,32 +22,33 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Iris 🆕 | Hands | Pose 🆕 -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :----: -[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) +Face Detection | Face Mesh | Iris | Hands | Pose | Hair Segmentation +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---------------: +[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) -Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT -:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT +:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) -[]() | Android | iOS | Desktop | Python | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | -[Iris](https://google.github.io/mediapipe/solutions/iris) 🆕 | ✅ | ✅ | ✅ | | ✅ | -[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | -[Pose](https://google.github.io/mediapipe/solutions/pose) 🆕 | ✅ | ✅ | ✅ | ✅ | ✅ | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ | +[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | +[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | +[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | ## MediaPipe on the Web @@ -88,6 +89,8 @@ run code search using ## Publications +* [Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog) + in Google Developers Blog * [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) in Google AI Blog * [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) diff --git a/docs/solutions/autoflip.md b/docs/solutions/autoflip.md index 48204d5b6..3dec7719b 100644 --- a/docs/solutions/autoflip.md +++ b/docs/solutions/autoflip.md @@ -2,7 +2,7 @@ layout: default title: AutoFlip (Saliency-aware Video Cropping) parent: Solutions -nav_order: 11 +nav_order: 12 --- # AutoFlip: Saliency-aware Video Cropping diff --git a/docs/solutions/instant_motion_tracking.md b/docs/solutions/instant_motion_tracking.md new file mode 100644 index 000000000..cf23a7b8c --- /dev/null +++ b/docs/solutions/instant_motion_tracking.md @@ -0,0 +1,122 @@ +--- +layout: default +title: Instant Motion Tracking +parent: Solutions +nav_order: 9 +--- + +# MediaPipe Instant Motion Tracking +{: .no_toc } + +1. TOC +{:toc} +--- + +## Overview + +Augmented Reality (AR) technology creates fun, engaging, and immersive user +experiences. The ability to perform AR tracking across devices and platforms, +without initialization, remains important to power AR applications at scale. + +MediaPipe Instant Motion Tracking provides AR tracking across devices and +platforms without initialization or calibration. It is built upon the +[MediaPipe Box Tracking](./box_tracking.md) solution. With Instant Motion +Tracking, you can easily place virtual 2D and 3D content on static or moving +surfaces, allowing them to seamlessly interact with the real-world environment. + +![instant_motion_tracking_android_small](../images/mobile/instant_motion_tracking_android_small.gif) | +:-----------------------------------------------------------------------: | +*Fig 1. Instant Motion Tracking is used to augment the world with a 3D sticker.* | + +## Pipeline + +The Instant Motion Tracking pipeline is implemented as a MediaPipe +[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt), +which internally utilizes a +[RegionTrackingSubgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/subgraphs/region_tracking.pbtxt) +in order to perform anchor tracking for each individual 3D sticker. + +We first use a +[StickerManagerCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/calculators/sticker_manager_calculator.cc) +to prepare the individual sticker data for the rest of the application. This +information is then sent to the +[RegionTrackingSubgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/subgraphs/region_tracking.pbtxt) +that performs 3D region tracking for sticker placement and rendering. Once +acquired, our tracked sticker regions are sent with user transformations (i.e. +gestures from the user to rotate and zoom the sticker) and IMU data to the +[MatricesManagerCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/calculators/matrices_manager_calculator.cc), +which turns all our sticker transformation data into a set of model matrices. +This data is handled directly by our +[GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) +as an input stream, which will render the provided texture and object file using +our matrix specifications. The output of +[GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) +is a video stream depicting the virtual 3D content rendered on top of the real +world, creating immersive AR experiences for users. + +## Using Instant Motion Tracking + +With the Instant Motion Tracking MediaPipe [graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt), +an application can create an interactive and realistic AR experience by +specifying the required input streams, side packets, and output streams. +The input streams are the following: + +* Input Video (GpuBuffer): Video frames to render augmented stickers onto. +* Rotation Matrix (9-element Float Array): The 3x3 row-major rotation +matrix from the device IMU to determine proper orientation of the device. +* Sticker Proto String (String): A string representing the +serialized [sticker buffer protobuf message](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.proto), +containing a list of all stickers and their attributes. + * Each sticker in the Protobuffer has a unique ID to find associated + anchors and transforms, an initial anchor placement in a normalized [0.0, 1.0] + 3D space, a user rotation and user scaling transform on the sticker, + and an integer indicating which type of objects to render for the + sticker (e.g. 3D asset or GIF). +* Sticker Sentinel (Integer): When an anchor must be initially placed or +repositioned, this value must be changed to the ID of the anchor to reset from +the sticker buffer protobuf message. If no valid ID is provided, the system +will simply maintain tracking. + +Side packets are also an integral part of the Instant Motion Tracking solution +to provide device-specific information for the rendering system: + +* Field of View (Float): The field of view of the camera in radians. +* Aspect Ratio (Float): The aspect ratio (width / height) of the camera frames + (this ratio corresponds to the image frames themselves, not necessarily the + screen bounds). +* Object Asset (String): The + [GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) + must be provided with an associated asset file name pointing to the 3D model + to render in the viewfinder. +* (Optional) Texture (ImageFrame on Android, GpuBuffer on iOS): Textures for + the + [GlAnimationOverlayCalculator](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc) + can be provided either via an input stream (dynamic texturing) or as a side + packet (unchanging texture). + +The rendering system for the Instant Motion Tracking is powered by OpenGL. For +more information regarding the structure of model matrices and OpenGL rendering, +please visit [OpenGL Wiki](https://www.khronos.org/opengl/wiki/). With the +specifications above, the Instant Motion Tracking capabilities can be adapted to +any device that is able to run the MediaPipe framework with a working IMU system +and connected camera. + +## Example Apps + +Please first see general instructions for +[Android](../getting_started/building_examples.md#android) on how to build +MediaPipe examples. + +* Graph: [mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt) + +* Android target (or download prebuilt [ARM64 APK](https://drive.google.com/file/d/1KnaBBoKpCHR73nOBJ4fL_YdWVTAcwe6L/view?usp=sharing)): +[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking:instantmotiontracking`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/BUILD) + +## Resources + +* Google Developers Blog: + [Instant Motion Tracking With MediaPipe](https://mediapipe.page.link/instant-motion-tracking-blog) +* Google AI Blog: + [The Instant Motion Tracking Behind Motion Stills AR](https://ai.googleblog.com/2018/02/the-instant-motion-tracking-behind.html) +* Paper: + [Instant Motion Tracking and Its Applications to Augmented Reality](https://arxiv.org/abs/1907.06796) diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md index 6aae833e3..8bf207402 100644 --- a/docs/solutions/iris.md +++ b/docs/solutions/iris.md @@ -55,7 +55,7 @@ that uses a from the [face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark), an -[iris landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_tracking/iris_landmark_left_and_right_gpu.pbtxt) +[iris landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_gpu.pbtxt) from the [iris landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark), and renders using a dedicated @@ -72,6 +72,11 @@ Note: To visualize a graph, copy the graph and paste it into to visualize its associated subgraphs, please see [visualizer documentation](../tools/visualizer.md). +The output of the pipeline is a set of 478 3D landmarks, including 468 face +landmarks from [MediaPipe Face Mesh](./face_mesh.md), with those around the eyes +further refined (see Fig 2), and 10 additional iris landmarks appended at the +end (5 for each eye, and see Fig 2 also). + ## Models ### Face Detection Model diff --git a/docs/solutions/knift.md b/docs/solutions/knift.md index 82e84fd28..8e4ed98b0 100644 --- a/docs/solutions/knift.md +++ b/docs/solutions/knift.md @@ -2,7 +2,7 @@ layout: default title: KNIFT (Template-based Feature Matching) parent: Solutions -nav_order: 10 +nav_order: 11 --- # MediaPipe KNIFT diff --git a/docs/solutions/media_sequence.md b/docs/solutions/media_sequence.md index 7085f090b..16a2278cd 100644 --- a/docs/solutions/media_sequence.md +++ b/docs/solutions/media_sequence.md @@ -2,7 +2,7 @@ layout: default title: Dataset Preparation with MediaSequence parent: Solutions -nav_order: 12 +nav_order: 13 --- # Dataset Preparation with MediaSequence diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index 5029917dd..4c18f9f0f 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -2,7 +2,7 @@ layout: default title: Objectron (3D Object Detection) parent: Solutions -nav_order: 9 +nav_order: 10 --- # MediaPipe Objectron @@ -161,7 +161,7 @@ to visualize its associated subgraphs, please see ### Objectron for Shoes * Graph: - [`mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt) + [`mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt) * Android target: [(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1S0K4hbWt3o31FfQ4QU3Rz7IHrvOUMx1d) [`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD) diff --git a/docs/solutions/pose.md b/docs/solutions/pose.md index 7e8216662..6b3fa3868 100644 --- a/docs/solutions/pose.md +++ b/docs/solutions/pose.md @@ -142,10 +142,21 @@ MediaPipe examples. ### Python -Please first see general instructions for -[Python](../getting_started/building_examples.md#python) examples. +MediaPipe Python package is available on +[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip +install mediapipe` on Linux and macOS, as described below and in this +[colab](https://mediapipe.page.link/mp-py-colab). If you do need to build the +Python package from source, see +[additional instructions](../getting_started/building_examples.md#python). ```bash +# Activate a Python virtual environment. +$ python3 -m venv mp_env && source mp_env/bin/activate + +# Install MediaPipe Python package +(mp_env)$ pip install mediapipe + +# Run in Python interpreter (mp_env)$ python3 >>> import mediapipe as mp >>> pose_tracker = mp.examples.UpperBodyPoseTracker() @@ -153,6 +164,9 @@ Please first see general instructions for # For image input >>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file') >>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file') +# To print out the pose landmarks, you can simply do "print(pose_landmarks)". +# However, the data points can be more accessible with the following approach. +>>> [print('x is', data_point.x, 'y is', data_point.y, 'z is', data_point.z, 'visibility is', data_point.visibility) for data_point in pose_landmarks.landmark] # For live camera input # (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.) @@ -162,6 +176,8 @@ Please first see general instructions for >>> pose_tracker.close() ``` +Tip: Use command `deactivate` to exit the Python virtual environment. + ### Web Please refer to [these instructions](../index.md#mediapipe-on-the-web). diff --git a/docs/solutions/solutions.md b/docs/solutions/solutions.md index e1822e4c0..6a852b751 100644 --- a/docs/solutions/solutions.md +++ b/docs/solutions/solutions.md @@ -16,18 +16,19 @@ has_toc: false -[]() | Android | iOS | Desktop | Python | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | -[Iris](https://google.github.io/mediapipe/solutions/iris) 🆕 | ✅ | ✅ | ✅ | | ✅ | -[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | -[Pose](https://google.github.io/mediapipe/solutions/pose) 🆕 | ✅ | ✅ | ✅ | ✅ | ✅ | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅ +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ | +[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ | +[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | +[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | diff --git a/docs/solutions/youtube_8m.md b/docs/solutions/youtube_8m.md index 1c1637bff..f6d05bbca 100644 --- a/docs/solutions/youtube_8m.md +++ b/docs/solutions/youtube_8m.md @@ -2,7 +2,7 @@ layout: default title: YouTube-8M Feature Extraction and Model Inference parent: Solutions -nav_order: 13 +nav_order: 14 --- # YouTube-8M Feature Extraction and Model Inference diff --git a/docs/tools/tracing_and_profiling.md b/docs/tools/tracing_and_profiling.md index 2c05abfe4..055993349 100644 --- a/docs/tools/tracing_and_profiling.md +++ b/docs/tools/tracing_and_profiling.md @@ -144,10 +144,13 @@ we record ten intervals of half a second each. This can be overridden by adding ```bash profiler_config { trace_enabled: true - trace_log_path: "/sdcard/profiles" + trace_log_path: "/sdcard/profiles/" } ``` + Note: The forward slash at the end of the `trace_log_path` is necessary for + indicating that `profiles` is a directory (that *should* exist). + * Download the trace files from the device. ```bash diff --git a/mediapipe/__init__.py b/mediapipe/__init__.py index c0a275823..69d7dfc6f 100644 --- a/mediapipe/__init__.py +++ b/mediapipe/__init__.py @@ -12,7 +12,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ - -import mediapipe.examples.python as examples -from mediapipe.python import * -import mediapipe.util as util diff --git a/mediapipe/calculators/audio/BUILD b/mediapipe/calculators/audio/BUILD index 0408be420..b32529b79 100644 --- a/mediapipe/calculators/audio/BUILD +++ b/mediapipe/calculators/audio/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/calculators/audio/testdata/BUILD b/mediapipe/calculators/audio/testdata/BUILD index 64f6ccf63..ae679d029 100644 --- a/mediapipe/calculators/audio/testdata/BUILD +++ b/mediapipe/calculators/audio/testdata/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) filegroup( name = "test_audios", diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 1dddd7527..7f9ffd7f8 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -15,7 +15,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -290,7 +290,9 @@ cc_library( deps = [ ":concatenate_vector_calculator_cc_proto", "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", "@org_tensorflow//tensorflow/lite:framework", @@ -1119,6 +1121,7 @@ cc_library( ":constant_side_packet_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:collection_item_id", + "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", ], diff --git a/mediapipe/calculators/core/concatenate_vector_calculator.cc b/mediapipe/calculators/core/concatenate_vector_calculator.cc index ca73e341e..c57f84f1e 100644 --- a/mediapipe/calculators/core/concatenate_vector_calculator.cc +++ b/mediapipe/calculators/core/concatenate_vector_calculator.cc @@ -16,7 +16,9 @@ #include +#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/integral_types.h" #include "tensorflow/lite/interpreter.h" #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) @@ -45,6 +47,9 @@ REGISTER_CALCULATOR(ConcatenateFloatVectorCalculator); typedef ConcatenateVectorCalculator ConcatenateInt32VectorCalculator; REGISTER_CALCULATOR(ConcatenateInt32VectorCalculator); +typedef ConcatenateVectorCalculator ConcatenateUInt64VectorCalculator; +REGISTER_CALCULATOR(ConcatenateUInt64VectorCalculator); + // Example config: // node { // calculator: "ConcatenateTfLiteTensorVectorCalculator" @@ -60,6 +65,14 @@ typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark> ConcatenateLandmarkVectorCalculator; REGISTER_CALCULATOR(ConcatenateLandmarkVectorCalculator); +typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmarkList> + ConcatenateLandmarListVectorCalculator; +REGISTER_CALCULATOR(ConcatenateLandmarListVectorCalculator); + +typedef ConcatenateVectorCalculator + ConcatenateClassificationListVectorCalculator; +REGISTER_CALCULATOR(ConcatenateClassificationListVectorCalculator); + #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) typedef ConcatenateVectorCalculator<::tflite::gpu::gl::GlBuffer> ConcatenateGlBufferVectorCalculator; diff --git a/mediapipe/calculators/core/concatenate_vector_calculator.h b/mediapipe/calculators/core/concatenate_vector_calculator.h index 08e8e954f..ef72cb0dc 100644 --- a/mediapipe/calculators/core/concatenate_vector_calculator.h +++ b/mediapipe/calculators/core/concatenate_vector_calculator.h @@ -15,6 +15,7 @@ #ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_VECTOR_CALCULATOR_H_ #define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_VECTOR_CALCULATOR_H_ +#include #include #include @@ -26,10 +27,10 @@ namespace mediapipe { -// Concatenates several std::vector following stream index order. This class -// assumes that every input stream contains the vector type. To use this -// class for a particular type T, regisiter a calculator using -// ConcatenateVectorCalculator. +// Concatenates several objects of type T or std::vector following stream +// index order. This class assumes that every input stream contains either T or +// vector type. To use this class for a particular type T, regisiter a +// calculator using ConcatenateVectorCalculator. template class ConcatenateVectorCalculator : public CalculatorBase { public: @@ -38,7 +39,8 @@ class ConcatenateVectorCalculator : public CalculatorBase { RET_CHECK(cc->Outputs().NumEntries() == 1); for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { - cc->Inputs().Index(i).Set>(); + // Actual type T or vector will be validated in Process(). + cc->Inputs().Index(i).SetAny(); } cc->Outputs().Index(0).Set>(); @@ -69,9 +71,19 @@ class ConcatenateVectorCalculator : public CalculatorBase { CalculatorContext* cc) { auto output = absl::make_unique>(); for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { - if (cc->Inputs().Index(i).IsEmpty()) continue; - const std::vector& input = cc->Inputs().Index(i).Get>(); - output->insert(output->end(), input.begin(), input.end()); + auto& input = cc->Inputs().Index(i); + + if (input.IsEmpty()) continue; + + if (input.Value().ValidateAsType().ok()) { + const U& value = input.Get(); + output->push_back(value); + } else if (input.Value().ValidateAsType>().ok()) { + const std::vector& value = input.Get>(); + output->insert(output->end(), value.begin(), value.end()); + } else { + return ::mediapipe::InvalidArgumentError("Invalid input stream type."); + } } cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); return ::mediapipe::OkStatus(); @@ -88,17 +100,32 @@ class ConcatenateVectorCalculator : public CalculatorBase { CalculatorContext* cc) { auto output = absl::make_unique>(); for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { - if (cc->Inputs().Index(i).IsEmpty()) continue; - ::mediapipe::StatusOr>> input_status = - cc->Inputs().Index(i).Value().Consume>(); - if (input_status.ok()) { - std::unique_ptr> input_vector = - std::move(input_status).ValueOrDie(); - output->insert(output->end(), - std::make_move_iterator(input_vector->begin()), - std::make_move_iterator(input_vector->end())); + auto& input = cc->Inputs().Index(i); + + if (input.IsEmpty()) continue; + + if (input.Value().ValidateAsType().ok()) { + ::mediapipe::StatusOr> value_status = + input.Value().Consume(); + if (value_status.ok()) { + std::unique_ptr value = std::move(value_status).ValueOrDie(); + output->push_back(std::move(*value)); + } else { + return value_status.status(); + } + } else if (input.Value().ValidateAsType>().ok()) { + ::mediapipe::StatusOr>> value_status = + input.Value().Consume>(); + if (value_status.ok()) { + std::unique_ptr> value = + std::move(value_status).ValueOrDie(); + output->insert(output->end(), std::make_move_iterator(value->begin()), + std::make_move_iterator(value->end())); + } else { + return value_status.status(); + } } else { - return input_status.status(); + return ::mediapipe::InvalidArgumentError("Invalid input stream type."); } } cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); @@ -109,7 +136,7 @@ class ConcatenateVectorCalculator : public CalculatorBase { ::mediapipe::Status ConsumeAndConcatenateVectors(std::false_type, CalculatorContext* cc) { return ::mediapipe::InternalError( - "Cannot copy or move input vectors to concatenate them"); + "Cannot copy or move inputs to concatenate them"); } private: diff --git a/mediapipe/calculators/core/concatenate_vector_calculator_test.cc b/mediapipe/calculators/core/concatenate_vector_calculator_test.cc index 4b27c2030..eaf23700c 100644 --- a/mediapipe/calculators/core/concatenate_vector_calculator_test.cc +++ b/mediapipe/calculators/core/concatenate_vector_calculator_test.cc @@ -30,11 +30,29 @@ namespace mediapipe { typedef ConcatenateVectorCalculator TestConcatenateIntVectorCalculator; REGISTER_CALCULATOR(TestConcatenateIntVectorCalculator); +void AddInputVector(int index, const std::vector& input, int64 timestamp, + CalculatorRunner* runner) { + runner->MutableInputs()->Index(index).packets.push_back( + MakePacket>(input).At(Timestamp(timestamp))); +} + void AddInputVectors(const std::vector>& inputs, int64 timestamp, CalculatorRunner* runner) { for (int i = 0; i < inputs.size(); ++i) { - runner->MutableInputs()->Index(i).packets.push_back( - MakePacket>(inputs[i]).At(Timestamp(timestamp))); + AddInputVector(i, inputs[i], timestamp, runner); + } +} + +void AddInputItem(int index, int input, int64 timestamp, + CalculatorRunner* runner) { + runner->MutableInputs()->Index(index).packets.push_back( + MakePacket(input).At(Timestamp(timestamp))); +} + +void AddInputItems(const std::vector& inputs, int64 timestamp, + CalculatorRunner* runner) { + for (int i = 0; i < inputs.size(); ++i) { + AddInputItem(i, inputs[i], timestamp, runner); } } @@ -131,6 +149,135 @@ TEST(TestConcatenateIntVectorCalculatorTest, OneEmptyStreamNoOutput) { EXPECT_EQ(0, outputs.size()); } +TEST(TestConcatenateIntVectorCalculatorTest, ItemsOneTimestamp) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + std::vector inputs = {1, 2, 3}; + AddInputItems(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + std::vector expected_vector = {1, 2, 3}; + EXPECT_EQ(expected_vector, outputs[0].Get>()); +} + +TEST(TestConcatenateIntVectorCalculatorTest, ItemsTwoInputsAtTwoTimestamps) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + { + std::vector inputs = {1, 2, 3}; + AddInputItems(inputs, /*timestamp=*/1, &runner); + } + { + std::vector inputs = {4, 5, 6}; + AddInputItems(inputs, /*timestamp=*/2, &runner); + } + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(2, outputs.size()); + { + EXPECT_EQ(3, outputs[0].Get>().size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + std::vector expected_vector = {1, 2, 3}; + EXPECT_EQ(expected_vector, outputs[0].Get>()); + } + { + EXPECT_EQ(3, outputs[1].Get>().size()); + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + std::vector expected_vector = {4, 5, 6}; + EXPECT_EQ(expected_vector, outputs[1].Get>()); + } +} + +TEST(TestConcatenateIntVectorCalculatorTest, ItemsOneEmptyStreamStillOutput) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + // No third input item. + std::vector inputs = {1, 2}; + AddInputItems(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + std::vector expected_vector = {1, 2}; + EXPECT_EQ(expected_vector, outputs[0].Get>()); +} + +TEST(TestConcatenateIntVectorCalculatorTest, ItemsOneEmptyStreamNoOutput) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/ + "[mediapipe.ConcatenateVectorCalculatorOptions.ext]: " + "{only_emit_if_all_present: true}", + /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + // No third input item. + std::vector inputs = {1, 2}; + AddInputItems(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(0, outputs.size()); +} + +TEST(TestConcatenateIntVectorCalculatorTest, MixedVectorsAndItems) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/"", /*num_inputs=*/4, + /*num_outputs=*/1, /*num_side_packets=*/0); + + std::vector vector_0 = {1, 2}; + std::vector vector_1 = {3, 4, 5}; + int item_0 = 6; + int item_1 = 7; + + AddInputVector(/*index*/ 0, vector_0, /*timestamp=*/1, &runner); + AddInputVector(/*index*/ 1, vector_1, /*timestamp=*/1, &runner); + AddInputItem(/*index*/ 2, item_0, /*timestamp=*/1, &runner); + AddInputItem(/*index*/ 3, item_1, /*timestamp=*/1, &runner); + + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + std::vector expected_vector = {1, 2, 3, 4, 5, 6, 7}; + EXPECT_EQ(expected_vector, outputs[0].Get>()); +} + +TEST(TestConcatenateIntVectorCalculatorTest, MixedVectorsAndItemsAnother) { + CalculatorRunner runner("TestConcatenateIntVectorCalculator", + /*options_string=*/"", /*num_inputs=*/4, + /*num_outputs=*/1, /*num_side_packets=*/0); + + int item_0 = 1; + std::vector vector_0 = {2, 3}; + std::vector vector_1 = {4, 5, 6}; + int item_1 = 7; + + AddInputItem(/*index*/ 0, item_0, /*timestamp=*/1, &runner); + AddInputVector(/*index*/ 1, vector_0, /*timestamp=*/1, &runner); + AddInputVector(/*index*/ 2, vector_1, /*timestamp=*/1, &runner); + AddInputItem(/*index*/ 3, item_1, /*timestamp=*/1, &runner); + + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + std::vector expected_vector = {1, 2, 3, 4, 5, 6, 7}; + EXPECT_EQ(expected_vector, outputs[0].Get>()); +} + void AddInputVectors(const std::vector>& inputs, int64 timestamp, CalculatorRunner* runner) { for (int i = 0; i < inputs.size(); ++i) { diff --git a/mediapipe/calculators/core/constant_side_packet_calculator.cc b/mediapipe/calculators/core/constant_side_packet_calculator.cc index 2a60a2fd1..7541ccd66 100644 --- a/mediapipe/calculators/core/constant_side_packet_calculator.cc +++ b/mediapipe/calculators/core/constant_side_packet_calculator.cc @@ -18,6 +18,7 @@ #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/collection_item_id.h" #include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" @@ -71,6 +72,8 @@ class ConstantSidePacketCalculator : public CalculatorBase { packet.Set(); } else if (packet_options.has_string_value()) { packet.Set(); + } else if (packet_options.has_uint64_value()) { + packet.Set(); } else { return ::mediapipe::InvalidArgumentError( "None of supported values were specified in options."); @@ -95,6 +98,8 @@ class ConstantSidePacketCalculator : public CalculatorBase { packet.Set(MakePacket(packet_options.bool_value())); } else if (packet_options.has_string_value()) { packet.Set(MakePacket(packet_options.string_value())); + } else if (packet_options.has_uint64_value()) { + packet.Set(MakePacket(packet_options.uint64_value())); } else { return ::mediapipe::InvalidArgumentError( "None of supported values were specified in options."); diff --git a/mediapipe/calculators/core/constant_side_packet_calculator.proto b/mediapipe/calculators/core/constant_side_packet_calculator.proto index 6d7f24656..6b3feebde 100644 --- a/mediapipe/calculators/core/constant_side_packet_calculator.proto +++ b/mediapipe/calculators/core/constant_side_packet_calculator.proto @@ -29,6 +29,7 @@ message ConstantSidePacketCalculatorOptions { float float_value = 2; bool bool_value = 3; string string_value = 4; + uint64 uint64_value = 5; } } diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 3cefe9439..a14bd31d1 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/calculators/image/testdata/BUILD b/mediapipe/calculators/image/testdata/BUILD index a44f28ce0..da192b513 100644 --- a/mediapipe/calculators/image/testdata/BUILD +++ b/mediapipe/calculators/image/testdata/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) filegroup( name = "test_images", diff --git a/mediapipe/calculators/internal/BUILD b/mediapipe/calculators/internal/BUILD index eab1678e0..54b6c20f1 100644 --- a/mediapipe/calculators/internal/BUILD +++ b/mediapipe/calculators/internal/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index f934bd5a4..3daf3827f 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -15,7 +15,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -427,6 +427,10 @@ cc_library( deps = [ ":tensorflow_session", ":tensorflow_inference_calculator_cc_proto", + "//mediapipe/framework:timestamp", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/memory", + "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/tool:status_util", "@com_google_absl//absl/strings", @@ -434,6 +438,8 @@ cc_library( "//mediapipe/framework/deps:clock", "//mediapipe/framework/port:status", "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:map_util", + "//mediapipe/framework:packet", ] + select({ "//conditions:default": [ "@org_tensorflow//tensorflow/core:framework", diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc index 5ad8e853c..36c3da7e7 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc @@ -93,7 +93,7 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); cc->Inputs().Index(0).Set( // tensorflow::Tensor stream. ); - RET_CHECK_EQ(cc->Inputs().NumEntries(), 1) + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1) << "Only one output stream is supported."; if (cc->InputSidePackets().HasTag(kBufferSize)) { diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc index 7ed835f64..5b9a74a6d 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.cc @@ -19,16 +19,22 @@ #include #include +#include "absl/base/thread_annotations.h" +#include "absl/memory/memory.h" #include "absl/strings/str_split.h" #include "absl/synchronization/mutex.h" #include "mediapipe/calculators/tensorflow/tensorflow_inference_calculator.pb.h" #include "mediapipe/calculators/tensorflow/tensorflow_session.h" +#include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/deps/clock.h" #include "mediapipe/framework/deps/monotonic_clock.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/map_util.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/timestamp.h" #include "mediapipe/framework/tool/status_util.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" @@ -77,6 +83,17 @@ class SimpleSemaphore { absl::Mutex mutex_; absl::CondVar cond_; }; + +class InferenceState { + public: + InferenceState() : input_tensor_batches_(), batch_timestamps_() {} + // A mapping between stream tags and the tensors we are collecting as a + // batch. + std::map> input_tensor_batches_; + // The timestamps that go into a batch. + std::vector batch_timestamps_; +}; + } // namespace // This calculator performs inference on a trained TensorFlow model. @@ -218,11 +235,16 @@ class TensorFlowInferenceCalculator : public CalculatorBase { } static ::mediapipe::Status GetContract(CalculatorContract* cc) { + const auto& options = cc->Options(); RET_CHECK(!cc->Inputs().GetTags().empty()); for (const std::string& tag : cc->Inputs().GetTags()) { // The tensorflow::Tensor with the tag equal to the graph node. May // have a TimeSeriesHeader if all present TimeSeriesHeaders match. - cc->Inputs().Tag(tag).Set(); + if (!options.batched_input()) { + cc->Inputs().Tag(tag).Set(); + } else { + cc->Inputs().Tag(tag).Set>(); + } } RET_CHECK(!cc->Outputs().GetTags().empty()); for (const std::string& tag : cc->Outputs().GetTags()) { @@ -242,6 +264,22 @@ class TensorFlowInferenceCalculator : public CalculatorBase { return ::mediapipe::OkStatus(); } + std::unique_ptr CreateInferenceState(CalculatorContext* cc) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + std::unique_ptr inference_state = + absl::make_unique(); + if (cc->InputSidePackets().HasTag("RECURRENT_INIT_TENSORS") && + !cc->InputSidePackets().Tag("RECURRENT_INIT_TENSORS").IsEmpty()) { + std::map* init_tensor_map; + init_tensor_map = GetFromUniquePtr>( + cc->InputSidePackets().Tag("RECURRENT_INIT_TENSORS")); + for (const auto& p : *init_tensor_map) { + inference_state->input_tensor_batches_[p.first].emplace_back(p.second); + } + } + return inference_state; + } + ::mediapipe::Status Open(CalculatorContext* cc) override { options_ = cc->Options(); @@ -275,15 +313,6 @@ class TensorFlowInferenceCalculator : public CalculatorBase { recurrent_feed_tags_.insert(tags[0]); recurrent_fetch_tags_to_feed_tags_[tags[1]] = tags[0]; } - if (cc->InputSidePackets().HasTag("RECURRENT_INIT_TENSORS") && - !cc->InputSidePackets().Tag("RECURRENT_INIT_TENSORS").IsEmpty()) { - std::map* init_tensor_map; - init_tensor_map = GetFromUniquePtr>( - cc->InputSidePackets().Tag("RECURRENT_INIT_TENSORS")); - for (const auto& p : *init_tensor_map) { - input_tensor_batches_[p.first].emplace_back(p.second); - } - } // Check that all tags are present in this signature bound to tensors. for (const std::string& tag : cc->Inputs().GetTags()) { @@ -297,9 +326,15 @@ class TensorFlowInferenceCalculator : public CalculatorBase { << options_.signature_name(); } - if (options_.batch_size() == 1) { + { + absl::WriterMutexLock l(&mutex_); + inference_state_ = std::unique_ptr(); + } + + if (options_.batch_size() == 1 || options_.batched_input()) { cc->SetOffset(0); } + return ::mediapipe::OkStatus(); } @@ -316,6 +351,24 @@ class TensorFlowInferenceCalculator : public CalculatorBase { return ::mediapipe::OkStatus(); } + ::mediapipe::Status AggregateTensorPacket( + const std::string& tag_name, const Packet& packet, + std::map>* + input_tensors_by_tag_by_timestamp, + InferenceState* inference_state) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + tf::Tensor input_tensor(packet.Get()); + RET_CHECK_OK(AddBatchDimension(&input_tensor)); + if (::mediapipe::ContainsKey(recurrent_feed_tags_, tag_name)) { + // If we receive an input on a recurrent tag, override the state. + // It's OK to override the global state because there is just one + // input stream allowed for recurrent tensors. + inference_state_->input_tensor_batches_[tag_name].clear(); + } + (*input_tensors_by_tag_by_timestamp)[packet.Timestamp()].insert( + std::make_pair(tag_name, input_tensor)); + return ::mediapipe::OkStatus(); + } + // Removes the batch dimension of the output tensor if specified in the // calculator options. ::mediapipe::Status RemoveBatchDimension(tf::Tensor* output_tensor) { @@ -331,48 +384,85 @@ class TensorFlowInferenceCalculator : public CalculatorBase { } ::mediapipe::Status Process(CalculatorContext* cc) override { - std::map input_tensors_by_tag; - for (const std::string& tag_as_node_name : cc->Inputs().GetTags()) { - if (cc->Inputs().Tag(tag_as_node_name).IsEmpty()) { - // Recurrent tensors can be empty. - if (!::mediapipe::ContainsKey(recurrent_feed_tags_, tag_as_node_name)) { - if (options_.skip_on_missing_features()) { - return ::mediapipe::OkStatus(); - } else { - return ::mediapipe::InvalidArgumentError(absl::StrCat( - "Tag ", tag_as_node_name, - " not present at timestamp: ", cc->InputTimestamp().Value())); + std::unique_ptr inference_state_to_process; + { + absl::WriterMutexLock l(&mutex_); + if (inference_state_ == nullptr) { + inference_state_ = CreateInferenceState(cc); + } + std::map> + input_tensors_by_tag_by_timestamp; + for (const std::string& tag_as_node_name : cc->Inputs().GetTags()) { + if (cc->Inputs().Tag(tag_as_node_name).IsEmpty()) { + // Recurrent tensors can be empty. + if (!::mediapipe::ContainsKey(recurrent_feed_tags_, + tag_as_node_name)) { + if (options_.skip_on_missing_features()) { + return ::mediapipe::OkStatus(); + } else { + return ::mediapipe::InvalidArgumentError(absl::StrCat( + "Tag ", tag_as_node_name, + " not present at timestamp: ", cc->InputTimestamp().Value())); + } } + } else if (options_.batched_input()) { + const auto& tensor_packets = + cc->Inputs().Tag(tag_as_node_name).Get>(); + if (tensor_packets.size() > options_.batch_size()) { + return ::mediapipe::InvalidArgumentError(absl::StrCat( + "Batch for tag ", tag_as_node_name, + " has more packets than batch capacity. batch_size: ", + options_.batch_size(), " packets: ", tensor_packets.size())); + } + for (const auto& packet : tensor_packets) { + RET_CHECK_OK(AggregateTensorPacket( + tag_as_node_name, packet, &input_tensors_by_tag_by_timestamp, + inference_state_.get())); + } + } else { + RET_CHECK_OK(AggregateTensorPacket( + tag_as_node_name, cc->Inputs().Tag(tag_as_node_name).Value(), + &input_tensors_by_tag_by_timestamp, inference_state_.get())); } - } else { - tf::Tensor input_tensor( - cc->Inputs().Tag(tag_as_node_name).Get()); - RET_CHECK_OK(AddBatchDimension(&input_tensor)); - if (::mediapipe::ContainsKey(recurrent_feed_tags_, tag_as_node_name)) { - // If we receive an input on a recurrent tag, override the state. - // It's OK to override the global state because there is just one - // input stream allowed for recurrent tensors. - input_tensor_batches_[tag_as_node_name].clear(); + } + for (const auto& timestamp_and_input_tensors_by_tag : + input_tensors_by_tag_by_timestamp) { + inference_state_->batch_timestamps_.emplace_back( + timestamp_and_input_tensors_by_tag.first); + for (const auto& input_tensor_and_tag : + timestamp_and_input_tensors_by_tag.second) { + inference_state_->input_tensor_batches_[input_tensor_and_tag.first] + .emplace_back(input_tensor_and_tag.second); } - input_tensors_by_tag.insert( - std::make_pair(tag_as_node_name, input_tensor)); + } + if (inference_state_->batch_timestamps_.size() == options_.batch_size() || + options_.batched_input()) { + inference_state_to_process = std::move(inference_state_); + inference_state_ = std::unique_ptr(); } } - batch_timestamps_.emplace_back(cc->InputTimestamp()); - for (const auto& input_tensor_and_tag : input_tensors_by_tag) { - input_tensor_batches_[input_tensor_and_tag.first].emplace_back( - input_tensor_and_tag.second); + + if (inference_state_to_process) { + MP_RETURN_IF_ERROR( + OutputBatch(cc, std::move(inference_state_to_process))); } - if (batch_timestamps_.size() == options_.batch_size()) { - MP_RETURN_IF_ERROR(OutputBatch(cc)); - } return ::mediapipe::OkStatus(); } ::mediapipe::Status Close(CalculatorContext* cc) override { - if (!batch_timestamps_.empty()) { - MP_RETURN_IF_ERROR(OutputBatch(cc)); + std::unique_ptr inference_state_to_process = nullptr; + { + absl::WriterMutexLock l(&mutex_); + if (cc->GraphStatus().ok() && inference_state_ != nullptr && + !inference_state_->batch_timestamps_.empty()) { + inference_state_to_process = std::move(inference_state_); + inference_state_ = std::unique_ptr(); + } + } + if (inference_state_to_process) { + MP_RETURN_IF_ERROR( + OutputBatch(cc, std::move(inference_state_to_process))); } return ::mediapipe::OkStatus(); } @@ -385,10 +475,12 @@ class TensorFlowInferenceCalculator : public CalculatorBase { // memory buffer. Therefore, copies are cheap and should not cause the memory // buffer to fall out of scope. In contrast, concat is only used where // necessary. - ::mediapipe::Status OutputBatch(CalculatorContext* cc) { + ::mediapipe::Status OutputBatch( + CalculatorContext* cc, std::unique_ptr inference_state) { const int64 start_time = absl::ToUnixMicros(clock_->TimeNow()); std::vector> input_tensors; - for (auto& keyed_tensors : input_tensor_batches_) { + + for (auto& keyed_tensors : inference_state->input_tensor_batches_) { if (options_.batch_size() == 1) { // Short circuit to avoid the cost of deep copying tensors in concat. if (!keyed_tensors.second.empty()) { @@ -404,7 +496,8 @@ class TensorFlowInferenceCalculator : public CalculatorBase { } else { // Pad by replicating the first tens or, then ignore the values. keyed_tensors.second.resize(options_.batch_size()); - std::fill(keyed_tensors.second.begin() + batch_timestamps_.size(), + std::fill(keyed_tensors.second.begin() + + inference_state->batch_timestamps_.size(), keyed_tensors.second.end(), keyed_tensors.second[0]); tf::Tensor concated; const tf::Status concat_status = @@ -414,7 +507,7 @@ class TensorFlowInferenceCalculator : public CalculatorBase { concated); } } - input_tensor_batches_.clear(); + inference_state->input_tensor_batches_.clear(); std::vector output_tensor_names; std::vector output_name_in_signature; for (const std::string& tag : cc->Outputs().GetTags()) { @@ -466,9 +559,11 @@ class TensorFlowInferenceCalculator : public CalculatorBase { int pos = std::find(output_name_in_signature.begin(), output_name_in_signature.end(), tag_pair.first) - output_name_in_signature.begin(); - input_tensor_batches_[tag_pair.second].emplace_back(outputs[pos]); + inference_state->input_tensor_batches_[tag_pair.second].emplace_back( + outputs[pos]); } + absl::WriterMutexLock l(&mutex_); // Set that we want to split on each index of the 0th dimension. std::vector split_vector(options_.batch_size(), 1); for (int i = 0; i < output_tensor_names.size(); ++i) { @@ -478,7 +573,8 @@ class TensorFlowInferenceCalculator : public CalculatorBase { RET_CHECK_OK(RemoveBatchDimension(&output_tensor)); cc->Outputs() .Tag(output_name_in_signature[i]) - .Add(new tf::Tensor(output_tensor), batch_timestamps_[0]); + .Add(new tf::Tensor(output_tensor), + inference_state->batch_timestamps_[0]); } } else { std::vector split_tensors; @@ -486,22 +582,30 @@ class TensorFlowInferenceCalculator : public CalculatorBase { tf::tensor::Split(outputs[i], split_vector, &split_tensors); CHECK(split_status.ok()) << split_status.ToString(); // Loop over timestamps so that we don't copy the padding. - for (int j = 0; j < batch_timestamps_.size(); ++j) { + for (int j = 0; j < inference_state->batch_timestamps_.size(); ++j) { tf::Tensor output_tensor(split_tensors[j]); RET_CHECK_OK(RemoveBatchDimension(&output_tensor)); cc->Outputs() .Tag(output_name_in_signature[i]) - .Add(new tf::Tensor(output_tensor), batch_timestamps_[j]); + .Add(new tf::Tensor(output_tensor), + inference_state->batch_timestamps_[j]); } } } + // Get end time and report. const int64 end_time = absl::ToUnixMicros(clock_->TimeNow()); cc->GetCounter(kTotalUsecsCounterSuffix) ->IncrementBy(end_time - start_time); cc->GetCounter(kTotalProcessedTimestampsCounterSuffix) - ->IncrementBy(batch_timestamps_.size()); - batch_timestamps_.clear(); + ->IncrementBy(inference_state->batch_timestamps_.size()); + + // Make sure we hold on to the recursive state. + if (!options_.recurrent_tag_pair().empty()) { + inference_state_ = std::move(inference_state); + inference_state_->batch_timestamps_.clear(); + } + return ::mediapipe::OkStatus(); } @@ -514,11 +618,8 @@ class TensorFlowInferenceCalculator : public CalculatorBase { // A mapping between stream tags and the tensor names they are bound to. std::map tag_to_tensor_map_; - // A mapping between stream tags and the tensors we are collecting as a batch. - std::map> input_tensor_batches_; - - // The timestamps that go into a batch. - std::vector batch_timestamps_; + absl::Mutex mutex_; + std::unique_ptr inference_state_ ABSL_GUARDED_BY(mutex_); // The options for the calculator. TensorFlowInferenceCalculatorOptions options_; diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto index a353d2f55..98dbd5b4b 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator.proto @@ -76,4 +76,13 @@ message TensorFlowInferenceCalculatorOptions { // only works in the local process, not "globally" across multiple processes // or replicas (if any). Default to 0, i.e. no limit. optional int32 max_concurrent_session_runs = 6 [default = 0]; + + // If turned on, the Calculator expects a vector of batched packages as input. + // This will make sure that you can turn on max_in_flight for batch_size + // greater than 1. Otherwise it results in problems of none-monotonically + // increasing timestamps. + // Use BatchSequentialCalculator to create the batches. The batch_size + // should agree for both calculators. All the data in a batch is processed + // together. The BatchSequentialCalculator can't run with max_in_flight. + optional bool batched_input = 7; } diff --git a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator_test.cc b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator_test.cc index 4f07b897c..2ec6cbe3b 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_inference_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_inference_calculator_test.cc @@ -89,17 +89,31 @@ class TensorflowInferenceCalculatorTest : public ::testing::Test { output_side_packets.Tag("SESSION"); } - // Create tensor from Vector and add as a Packet to the provided tag as input. - void AddVectorToInputsAsTensor(const std::vector& input, - const std::string& tag, int64 time) { + Packet CreateTensorPacket(const std::vector& input, int64 time) { tf::TensorShape tensor_shape; tensor_shape.AddDim(input.size()); auto tensor = absl::make_unique(tf::DT_INT32, tensor_shape); for (int i = 0; i < input.size(); ++i) { tensor->vec()(i) = input[i]; } + return Adopt(tensor.release()).At(Timestamp(time)); + } + + // Create tensor from Vector and add as a Packet to the provided tag as input. + void AddVectorToInputsAsTensor(const std::vector& input, + const std::string& tag, int64 time) { runner_->MutableInputs()->Tag(tag).packets.push_back( - Adopt(tensor.release()).At(Timestamp(time))); + CreateTensorPacket(input, time)); + } + + // Create tensor from Vector and add as a Packet to the provided tag as input. + void AddVectorToInputsAsPacket(const std::vector& packets, + const std::string& tag) { + CHECK(!packets.empty()) + << "Please specify at least some data in the packet"; + auto packets_ptr = absl::make_unique>(packets); + runner_->MutableInputs()->Tag(tag).packets.push_back( + Adopt(packets_ptr.release()).At(packets.begin()->Timestamp())); } std::unique_ptr runner_; @@ -183,6 +197,45 @@ TEST_F(TensorflowInferenceCalculatorTest, GetComputed) { EXPECT_THAT(run_status.ToString(), testing::HasSubstr("Tag B")); } +TEST_F(TensorflowInferenceCalculatorTest, GetComputed_MaxInFlight) { + CalculatorGraphConfig::Node config; + config.set_calculator("TensorFlowInferenceCalculator"); + config.add_input_stream("A:tensor_a"); + config.add_input_stream("B:tensor_b"); + config.add_output_stream("MULTIPLIED:tensor_o1"); + config.add_input_side_packet("SESSION:session"); + config.set_max_in_flight(2); + CalculatorOptions options; + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batch_size(1); + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_add_batch_dim_to_tensors(false); + *config.mutable_options() = options; + + runner_ = absl::make_unique(config); + AddSessionInputSidePacket(); + AddVectorToInputsAsTensor({2, 2, 2}, "A", 0); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 0); + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets_mult = + runner_->Outputs().Tag("MULTIPLIED").packets; + ASSERT_EQ(1, output_packets_mult.size()); + const tf::Tensor& tensor_mult = output_packets_mult[0].Get(); + tf::TensorShape expected_shape({3}); + auto expected_tensor = tf::test::AsTensor({6, 8, 10}, expected_shape); + tf::test::ExpectTensorEqual(expected_tensor, tensor_mult); + + // Add only one of the two expected tensors at the next timestamp, expect + // useful failure message. + AddVectorToInputsAsTensor({1, 2, 3}, "A", 1); + auto run_status = runner_->Run(); + ASSERT_FALSE(run_status.ok()); + EXPECT_THAT(run_status.ToString(), + testing::HasSubstr("TensorFlowInferenceCalculator")); + EXPECT_THAT(run_status.ToString(), testing::HasSubstr("Tag B")); +} + TEST_F(TensorflowInferenceCalculatorTest, BadTag) { CalculatorGraphConfig::Node config; config.set_calculator("TensorFlowInferenceCalculator"); @@ -235,6 +288,86 @@ TEST_F(TensorflowInferenceCalculatorTest, GetMultiBatchComputed) { ->Get()); } +TEST_F(TensorflowInferenceCalculatorTest, GetMultiBatchComputed_MaxInFlight) { + CalculatorGraphConfig::Node config; + config.set_calculator("TensorFlowInferenceCalculator"); + config.add_input_stream("A:tensor_a"); + config.add_input_stream("B:tensor_b"); + config.add_output_stream("MULTIPLIED:tensor_o1"); + config.add_input_side_packet("SESSION:session"); + config.set_max_in_flight(2); + CalculatorOptions options; + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batch_size(1); + *config.mutable_options() = options; + + runner_ = absl::make_unique(config); + AddSessionInputSidePacket(); + AddVectorToInputsAsTensor({2, 2, 2}, "A", 0); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 0); + AddVectorToInputsAsTensor({3, 3, 3}, "A", 1); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 1); + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets_mult = + runner_->Outputs().Tag("MULTIPLIED").packets; + ASSERT_EQ(2, output_packets_mult.size()); + const tf::Tensor& tensor_mult = output_packets_mult[0].Get(); + auto expected_tensor = tf::test::AsTensor({6, 8, 10}); + tf::test::ExpectTensorEqual(tensor_mult, expected_tensor); + const tf::Tensor& tensor_mult1 = output_packets_mult[1].Get(); + auto expected_tensor1 = tf::test::AsTensor({9, 12, 15}); + tf::test::ExpectTensorEqual(tensor_mult1, expected_tensor1); + + EXPECT_EQ(2, runner_ + ->GetCounter( + "TensorFlowInferenceCalculator-TotalProcessedTimestamps") + ->Get()); +} + +TEST_F(TensorflowInferenceCalculatorTest, + GetMultiBatchComputed_MoreThanMaxInFlight) { + CalculatorGraphConfig::Node config; + config.set_calculator("TensorFlowInferenceCalculator"); + config.add_input_stream("A:tensor_a"); + config.add_input_stream("B:tensor_b"); + config.add_output_stream("MULTIPLIED:tensor_o1"); + config.add_input_side_packet("SESSION:session"); + config.set_max_in_flight(2); + CalculatorOptions options; + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batch_size(1); + *config.mutable_options() = options; + + runner_ = absl::make_unique(config); + AddSessionInputSidePacket(); + AddVectorToInputsAsTensor({2, 2, 2}, "A", 0); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 0); + AddVectorToInputsAsTensor({3, 3, 3}, "A", 1); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 1); + AddVectorToInputsAsTensor({4, 4, 4}, "A", 2); + AddVectorToInputsAsTensor({3, 4, 5}, "B", 2); + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets_mult = + runner_->Outputs().Tag("MULTIPLIED").packets; + ASSERT_EQ(3, output_packets_mult.size()); + const tf::Tensor& tensor_mult = output_packets_mult[0].Get(); + auto expected_tensor = tf::test::AsTensor({6, 8, 10}); + tf::test::ExpectTensorEqual(tensor_mult, expected_tensor); + const tf::Tensor& tensor_mult1 = output_packets_mult[1].Get(); + auto expected_tensor1 = tf::test::AsTensor({9, 12, 15}); + tf::test::ExpectTensorEqual(tensor_mult1, expected_tensor1); + const tf::Tensor& tensor_mult2 = output_packets_mult[2].Get(); + auto expected_tensor2 = tf::test::AsTensor({12, 16, 20}); + tf::test::ExpectTensorEqual(tensor_mult2, expected_tensor2); + + EXPECT_EQ(3, runner_ + ->GetCounter( + "TensorFlowInferenceCalculator-TotalProcessedTimestamps") + ->Get()); +} + TEST_F(TensorflowInferenceCalculatorTest, GetSingleBatchComputed) { CalculatorGraphConfig::Node config; config.set_calculator("TensorFlowInferenceCalculator"); @@ -311,6 +444,66 @@ TEST_F(TensorflowInferenceCalculatorTest, GetCloseBatchComputed) { ->Get()); } +TEST_F(TensorflowInferenceCalculatorTest, GetBatchComputed_MaxInFlight) { + CalculatorGraphConfig::Node config; + config.set_calculator("TensorFlowInferenceCalculator"); + config.add_input_stream("A:tensor_a"); + config.add_input_stream("B:tensor_b"); + config.add_output_stream("MULTIPLIED:tensor_o1"); + config.add_input_side_packet("SESSION:session"); + config.set_max_in_flight(2); + CalculatorOptions options; + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batch_size(2); + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_add_batch_dim_to_tensors(true); + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batched_input(true); + *config.mutable_options() = options; + + runner_ = absl::make_unique(config); + AddSessionInputSidePacket(); + AddVectorToInputsAsPacket( + {CreateTensorPacket({2, 2, 2}, 0), CreateTensorPacket({3, 3, 3}, 1)}, + "A"); + AddVectorToInputsAsPacket( + {CreateTensorPacket({3, 4, 5}, 0), CreateTensorPacket({3, 4, 5}, 1)}, + "B"); + AddVectorToInputsAsPacket( + {CreateTensorPacket({4, 4, 4}, 2), CreateTensorPacket({5, 5, 5}, 3)}, + "A"); + AddVectorToInputsAsPacket( + {CreateTensorPacket({3, 4, 5}, 2), CreateTensorPacket({3, 4, 5}, 3)}, + "B"); + AddVectorToInputsAsPacket({CreateTensorPacket({6, 6, 6}, 4)}, "A"); + AddVectorToInputsAsPacket({CreateTensorPacket({3, 4, 5}, 4)}, "B"); + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets_mult = + runner_->Outputs().Tag("MULTIPLIED").packets; + ASSERT_EQ(5, output_packets_mult.size()); + const tf::Tensor& tensor_mult = output_packets_mult[0].Get(); + auto expected_tensor = tf::test::AsTensor({6, 8, 10}); + tf::test::ExpectTensorEqual(tensor_mult, expected_tensor); + const tf::Tensor& tensor_mult1 = output_packets_mult[1].Get(); + auto expected_tensor1 = tf::test::AsTensor({9, 12, 15}); + tf::test::ExpectTensorEqual(tensor_mult1, expected_tensor1); + const tf::Tensor& tensor_mult2 = output_packets_mult[2].Get(); + auto expected_tensor2 = tf::test::AsTensor({12, 16, 20}); + tf::test::ExpectTensorEqual(tensor_mult2, expected_tensor2); + const tf::Tensor& tensor_mult3 = output_packets_mult[3].Get(); + auto expected_tensor3 = tf::test::AsTensor({15, 20, 25}); + tf::test::ExpectTensorEqual(tensor_mult3, expected_tensor3); + const tf::Tensor& tensor_mult4 = output_packets_mult[4].Get(); + auto expected_tensor4 = tf::test::AsTensor({18, 24, 30}); + tf::test::ExpectTensorEqual(tensor_mult4, expected_tensor4); + + EXPECT_EQ(5, runner_ + ->GetCounter( + "TensorFlowInferenceCalculator-TotalProcessedTimestamps") + ->Get()); +} + TEST_F(TensorflowInferenceCalculatorTest, TestRecurrentStates) { CalculatorGraphConfig::Node config; config.set_calculator("TensorFlowInferenceCalculator"); @@ -509,4 +702,40 @@ TEST_F(TensorflowInferenceCalculatorTest, ->Get()); } +TEST_F(TensorflowInferenceCalculatorTest, BatchedInputTooBigBatch) { + CalculatorGraphConfig::Node config; + config.set_calculator("TensorFlowInferenceCalculator"); + config.add_input_stream("A:tensor_a"); + config.add_input_stream("B:tensor_b"); + config.add_output_stream("MULTIPLIED:tensor_o1"); + config.add_input_side_packet("SESSION:session"); + config.set_max_in_flight(2); + CalculatorOptions options; + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batch_size(2); + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_add_batch_dim_to_tensors(true); + options.MutableExtension(TensorFlowInferenceCalculatorOptions::ext) + ->set_batched_input(true); + *config.mutable_options() = options; + + runner_ = absl::make_unique(config); + AddSessionInputSidePacket(); + AddVectorToInputsAsPacket( + {CreateTensorPacket({2, 2, 2}, 0), CreateTensorPacket({3, 3, 3}, 1), + CreateTensorPacket({4, 4, 4}, 2)}, + "A"); + AddVectorToInputsAsPacket( + {CreateTensorPacket({3, 4, 5}, 0), CreateTensorPacket({3, 4, 5}, 1), + CreateTensorPacket({3, 4, 5}, 2)}, + "B"); + + auto status = runner_->Run(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + ::testing::HasSubstr( + "has more packets than batch capacity. batch_size: 2 packets: 3")); +} + } // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/unpack_media_sequence_calculator.cc b/mediapipe/calculators/tensorflow/unpack_media_sequence_calculator.cc index a92b48d30..86a2a4afa 100644 --- a/mediapipe/calculators/tensorflow/unpack_media_sequence_calculator.cc +++ b/mediapipe/calculators/tensorflow/unpack_media_sequence_calculator.cc @@ -29,6 +29,7 @@ namespace mediapipe { // Streams: const char kBBoxTag[] = "BBOX"; const char kImageTag[] = "IMAGE"; +const char kKeypointsTag[] = "KEYPOINTS"; const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_"; const char kForwardFlowImageTag[] = "FORWARD_FLOW_ENCODED"; @@ -150,7 +151,6 @@ class UnpackMediaSequenceCalculator : public CalculatorBase { << "or" << kAudioDecoderOptions; } - // Optional streams. if (cc->Outputs().HasTag(kForwardFlowImageTag)) { cc->Outputs().Tag(kForwardFlowImageTag).Set(); } @@ -244,6 +244,10 @@ class UnpackMediaSequenceCalculator : public CalculatorBase { const auto& sequence = cc->InputSidePackets() .Tag(kSequenceExampleTag) .Get(); + if (cc->Outputs().HasTag(kKeypointsTag)) { + keypoint_names_ = absl::StrSplit(options.keypoint_names(), ','); + default_keypoint_location_ = options.default_keypoint_location(); + } if (cc->OutputSidePackets().HasTag(kDataPath)) { std::string root_directory = ""; if (cc->InputSidePackets().HasTag(kDatasetRootDirTag)) { @@ -357,7 +361,6 @@ class UnpackMediaSequenceCalculator : public CalculatorBase { end_timestamp = timestamps_[last_timestamp_key_][current_timestamp_index_ + 1]; } - for (const auto& map_kv : timestamps_) { for (int i = 0; i < map_kv.second.size(); ++i) { if (map_kv.second[i] >= start_timestamp && @@ -454,6 +457,10 @@ class UnpackMediaSequenceCalculator : public CalculatorBase { int current_timestamp_index_; // Store the very first timestamp, so we output everything on the first frame. int64 first_timestamp_seen_; + // List of keypoint names. + std::vector keypoint_names_; + // Default keypoint location when missing. + float default_keypoint_location_; }; REGISTER_CALCULATOR(UnpackMediaSequenceCalculator); } // namespace mediapipe diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index 2c4bb637b..ae56ae763 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -16,7 +16,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") load("@bazel_skylib//lib:selects.bzl", "selects") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -257,6 +257,7 @@ cc_library( }) + select({ "//conditions:default": [], "//mediapipe:android": [ + "//mediapipe/util/android/file/base", "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate", ], }) + select({ diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index a48cb2796..3dae64f9c 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -33,6 +33,12 @@ #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" +#if defined(MEDIAPIPE_ANDROID) +#include "mediapipe/util/android/file/base/file.h" +#include "mediapipe/util/android/file/base/filesystem.h" +#include "mediapipe/util/android/file/base/helpers.h" +#endif // ANDROID + #if MEDIAPIPE_TFLITE_GL_INFERENCE #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" @@ -219,6 +225,8 @@ class TfLiteInferenceCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: + ::mediapipe::Status ReadKernelsFromFile(); + ::mediapipe::Status WriteKernelsToFile(); ::mediapipe::Status LoadModel(CalculatorContext* cc); ::mediapipe::StatusOr GetModelAsPacket(const CalculatorContext& cc); ::mediapipe::Status LoadDelegate(CalculatorContext* cc); @@ -273,6 +281,9 @@ class TfLiteInferenceCalculator : public CalculatorBase { bool use_quantized_tensors_ = false; bool use_advanced_gpu_api_ = false; + + bool use_kernel_caching_ = false; + std::string cached_kernel_filename_; }; REGISTER_CALCULATOR(TfLiteInferenceCalculator); @@ -354,6 +365,17 @@ bool ShouldUseGpu(CC* cc) { options.has_delegate() && options.delegate().has_gpu() && options.delegate().gpu().use_advanced_gpu_api(); + + use_kernel_caching_ = + use_advanced_gpu_api_ && options.delegate().gpu().use_kernel_caching(); + + if (use_kernel_caching_) { +#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID) + cached_kernel_filename_ = + "/sdcard/" + mediapipe::File::Basename(options.model_path()) + ".ker"; +#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID + } + if (use_advanced_gpu_api_ && !gpu_input_) { LOG(WARNING) << "Cannot use advanced GPU APIs, input must be GPU buffers." "Falling back to the default TFLite API."; @@ -423,7 +445,23 @@ bool ShouldUseGpu(CC* cc) { }); } +::mediapipe::Status TfLiteInferenceCalculator::WriteKernelsToFile() { +#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID) + if (use_kernel_caching_) { + // Save kernel file. + auto kernel_cache = absl::make_unique>( + tflite_gpu_runner_->GetSerializedBinaryCache()); + std::string cache_str(kernel_cache->begin(), kernel_cache->end()); + MP_RETURN_IF_ERROR( + mediapipe::file::SetContents(cached_kernel_filename_, cache_str)); + } +#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { + MP_RETURN_IF_ERROR(WriteKernelsToFile()); + return RunInContextIfNeeded([this]() -> ::mediapipe::Status { if (delegate_) { interpreter_ = nullptr; @@ -635,6 +673,22 @@ bool ShouldUseGpu(CC* cc) { return ::mediapipe::OkStatus(); } +::mediapipe::Status TfLiteInferenceCalculator::ReadKernelsFromFile() { +#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID) + if (use_kernel_caching_) { + // Load pre-compiled kernel file. + if (mediapipe::File::Exists(cached_kernel_filename_)) { + std::string cache_str; + MP_RETURN_IF_ERROR( + mediapipe::file::GetContents(cached_kernel_filename_, &cache_str)); + std::vector cache_vec(cache_str.begin(), cache_str.end()); + tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec)); + } + } +#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status TfLiteInferenceCalculator::InitTFLiteGPURunner( CalculatorContext* cc) { #if MEDIAPIPE_TFLITE_GL_INFERENCE @@ -692,6 +746,9 @@ bool ShouldUseGpu(CC* cc) { ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer( gpu_data_out_[i]->elements, &gpu_data_out_[i]->buffer)); } + + MP_RETURN_IF_ERROR(ReadKernelsFromFile()); + MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build()); #endif // MEDIAPIPE_TFLITE_GL_INFERENCE diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.proto b/mediapipe/calculators/tflite/tflite_inference_calculator.proto index 4fc0af932..bd83fea45 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.proto @@ -48,6 +48,10 @@ message TfLiteInferenceCalculatorOptions { // example: // delegate: { gpu { use_advanced_gpu_api: true } } optional bool use_advanced_gpu_api = 1 [default = false]; + + // Load pre-compiled serialized binary cache to accelerate init process. + // Only available for OpenCL delegate on Android. + optional bool use_kernel_caching = 2 [default = false]; } // Android only. message Nnapi {} diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index c4f0f8283..9e327511f 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) @@ -783,6 +783,7 @@ mediapipe_cc_proto_library( cc_library( name = "landmarks_to_render_data_calculator", srcs = ["landmarks_to_render_data_calculator.cc"], + hdrs = ["landmarks_to_render_data_calculator.h"], visibility = ["//visibility:public"], deps = [ ":landmarks_to_render_data_calculator_cc_proto", diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index e66bc1095..13dcabc7e 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -389,8 +389,6 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); // Upload render target to GPU. { - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - glBindTexture(GL_TEXTURE_2D, image_mat_tex_); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width_canvas_, height_canvas_, GL_RGB, GL_UNSIGNED_BYTE, overlay_image); diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc index 3d43a64bb..6d8ee3fed 100644 --- a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -34,8 +35,6 @@ constexpr char kRenderDataTag[] = "RENDER_DATA"; constexpr char kLandmarkLabel[] = "KEYPOINT"; constexpr int kMaxLandmarkThickness = 18; -using ::mediapipe::RenderAnnotation_Point; - inline void SetColor(RenderAnnotation* annotation, const Color& color) { annotation->mutable_color()->set_r(color.r()); annotation->mutable_color()->set_g(color.g()); @@ -162,45 +161,6 @@ RenderAnnotation* AddPointRenderData(const Color& landmark_color, } // namespace -// A calculator that converts Landmark proto to RenderData proto for -// visualization. The input should be LandmarkList proto. It is also possible -// to specify the connections between landmarks. -// -// Example config: -// node { -// calculator: "LandmarksToRenderDataCalculator" -// input_stream: "NORM_LANDMARKS:landmarks" -// output_stream: "RENDER_DATA:render_data" -// options { -// [LandmarksToRenderDataCalculatorOptions.ext] { -// landmark_connections: [0, 1, 1, 2] -// landmark_color { r: 0 g: 255 b: 0 } -// connection_color { r: 0 g: 255 b: 0 } -// thickness: 4.0 -// } -// } -// } -class LandmarksToRenderDataCalculator : public CalculatorBase { - public: - LandmarksToRenderDataCalculator() {} - ~LandmarksToRenderDataCalculator() override {} - LandmarksToRenderDataCalculator(const LandmarksToRenderDataCalculator&) = - delete; - LandmarksToRenderDataCalculator& operator=( - const LandmarksToRenderDataCalculator&) = delete; - - static ::mediapipe::Status GetContract(CalculatorContract* cc); - - ::mediapipe::Status Open(CalculatorContext* cc) override; - - ::mediapipe::Status Process(CalculatorContext* cc) override; - - private: - LandmarksToRenderDataCalculatorOptions options_; - std::vector landmark_connections_; -}; -REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); - ::mediapipe::Status LandmarksToRenderDataCalculator::GetContract( CalculatorContract* cc) { RET_CHECK(cc->Inputs().HasTag(kLandmarksTag) || @@ -354,4 +314,5 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); return ::mediapipe::OkStatus(); } +REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); } // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.h b/mediapipe/calculators/util/landmarks_to_render_data_calculator.h new file mode 100644 index 000000000..8f45955f4 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.h @@ -0,0 +1,69 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_RENDER_DATA_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_RENDER_DATA_CALCULATOR_H_ + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +namespace mediapipe { + +// A calculator that converts Landmark proto to RenderData proto for +// visualization. The input should be LandmarkList proto. It is also possible +// to specify the connections between landmarks. +// +// Example config: +// node { +// calculator: "LandmarksToRenderDataCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// output_stream: "RENDER_DATA:render_data" +// options { +// [LandmarksToRenderDataCalculatorOptions.ext] { +// landmark_connections: [0, 1, 1, 2] +// landmark_color { r: 0 g: 255 b: 0 } +// connection_color { r: 0 g: 255 b: 0 } +// thickness: 4.0 +// } +// } +// } +class LandmarksToRenderDataCalculator : public CalculatorBase { + public: + LandmarksToRenderDataCalculator() {} + ~LandmarksToRenderDataCalculator() override {} + LandmarksToRenderDataCalculator(const LandmarksToRenderDataCalculator&) = + delete; + LandmarksToRenderDataCalculator& operator=( + const LandmarksToRenderDataCalculator&) = delete; + + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + + ::mediapipe::Status Process(CalculatorContext* cc) override; + + protected: + ::mediapipe::LandmarksToRenderDataCalculatorOptions options_; + std::vector landmark_connections_; +}; + +} // namespace mediapipe +#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_TO_RENDER_DATA_CALCULATOR_H_ diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index 57a500cc5..2930c488a 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -19,7 +19,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/calculators/video/tool/BUILD b/mediapipe/calculators/video/tool/BUILD index 96bc35669..3d3ed2f86 100644 --- a/mediapipe/calculators/video/tool/BUILD +++ b/mediapipe/calculators/video/tool/BUILD @@ -15,7 +15,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/calculators/video:__subpackages__"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/BUILD index 04e660a7d..d0ff4e8cb 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) # Basic library common across example apps. android_library( diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java index aa2ca0783..8a4924756 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic/MainActivity.java @@ -80,7 +80,7 @@ public class MainActivity extends AppCompatActivity { @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); + setContentView(getContentViewLayoutResId()); try { applicationInfo = @@ -112,6 +112,12 @@ public class MainActivity extends AppCompatActivity { PermissionHelper.checkAndRequestCameraPermissions(this); } + // Used to obtain the content view for this application. If you are extending this class, and + // have a custom layout, override this method and return the custom layout. + protected int getContentViewLayoutResId() { + return R.layout.activity_main; + } + @Override protected void onResume() { super.onResume(); diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu/BUILD index ba12b8133..7536be08b 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD index 56f6cd040..46a758ab6 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD index 0146466dd..2de32b36f 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/hairsegmentationgpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/hairsegmentationgpu/BUILD index 901576bcf..284dcd9a0 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/hairsegmentationgpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/hairsegmentationgpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD index 6f0b52369..d7841b6fa 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handdetectiongpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD index e9d6d5155..546ce9aa0 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/BUILD new file mode 100644 index 000000000..784221084 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/BUILD @@ -0,0 +1,99 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:private"]) + +java_lite_proto_library( + name = "sticker_buffer_java_proto_lite", + deps = ["//mediapipe/graphs/instant_motion_tracking/calculators:sticker_buffer_proto"], +) + +android_library( + name = "instantmotiontracking_lib", + srcs = glob(["*.java"]), + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + resource_files = glob([ + "res/layout/**", + "res/drawable/**", + ]), + visibility = ["//visibility:public"], + deps = [ + ":sticker_buffer_java_proto_lite", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/java/com/google/mediapipe/components:android_components", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + "//third_party:androidx_appcompat", + "//third_party:androidx_core", + "//third_party:opencv", + "@maven//:androidx_concurrent_concurrent_futures", + "@maven//:com_github_bumptech_glide_glide", + "@maven//:com_google_guava_guava", + ], +) + +# Include all calculators specific to this project defined by BUILD in graphs +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/instant_motion_tracking:instant_motion_tracking_deps", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +# Converts the .so cc_binary into a cc_library, to be consumed in an android_binary. +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +genrule( + name = "asset3d", + srcs = ["//mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets:robot/robot.obj.uuu.zip"], + outs = ["robot/robot.obj.uuu"], + cmd = "unzip -p $< > $@", +) + +android_binary( + name = "instantmotiontracking", + assets = [ + ":asset3d", + "//mediapipe/graphs/instant_motion_tracking:instant_motion_tracking.binarypb", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets:gif/gif.obj.uuu", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets:gif/default_gif_texture.jpg", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets:robot/robot_texture.jpg", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.instantmotiontracking", + "appName": "Instant Motion Tracking", + "mainActivity": ".MainActivity", + "cameraFacingFront": "False", + "binaryGraphName": "instant_motion_tracking.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + }, + multidex = "native", + deps = [ + ":instantmotiontracking_lib", + ":mediapipe_jni_lib", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java new file mode 100644 index 000000000..1b733ed82 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/GIFEditText.java @@ -0,0 +1,103 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.instantmotiontracking; + +import android.content.ClipDescription; +import android.content.Context; +import android.net.Uri; +import android.os.Bundle; +import androidx.appcompat.widget.AppCompatEditText; +import android.util.AttributeSet; +import android.util.Log; +import android.view.inputmethod.EditorInfo; +import android.view.inputmethod.InputConnection; +import androidx.core.view.inputmethod.EditorInfoCompat; +import androidx.core.view.inputmethod.InputConnectionCompat; +import androidx.core.view.inputmethod.InputContentInfoCompat; + +// import android.support.v13.view.inputmethod.EditorInfoCompat; +// import android.support.v13.view.inputmethod.InputConnectionCompat; +// import android.support.v13.view.inputmethod.InputContentInfoCompat; + +/** + * This custom EditText implementation uses the existing EditText framework in + * order to develop a GIFEditText input box which is capable of accepting GIF + * animations from the Android system keyboard and return the GIF location with + * a content URI. + */ +public class GIFEditText extends AppCompatEditText { + + private GIFCommitListener gifCommitListener; + + public GIFEditText(Context context) { + super(context); + } + + public GIFEditText(Context context, AttributeSet attrs) { + super(context, attrs); + } + + /** + * onGIFCommit is called once content is pushed to the EditText via the + * Android keyboard. + */ + public interface GIFCommitListener { + void onGIFCommit(Uri contentUri, ClipDescription description); + } + + /** + * Used to set the gifCommitListener for this GIFEditText. + * + * @param gifCommitListener handles response to new content pushed to EditText + */ + public void setGIFCommitListener(GIFCommitListener gifCommitListener) { + this.gifCommitListener = gifCommitListener; + } + + @Override + public InputConnection onCreateInputConnection(EditorInfo editorInfo) { + final InputConnection inputConnection = super.onCreateInputConnection(editorInfo); + EditorInfoCompat.setContentMimeTypes(editorInfo, new String[] {"image/gif"}); + return InputConnectionCompat.createWrapper( + inputConnection, + editorInfo, + new InputConnectionCompat.OnCommitContentListener() { + @Override + public boolean onCommitContent( + final InputContentInfoCompat inputContentInfo, int flags, Bundle opts) { + try { + if (gifCommitListener != null) { + Runnable runnable = + new Runnable() { + @Override + public void run() { + inputContentInfo.requestPermission(); + gifCommitListener.onGIFCommit( + inputContentInfo.getContentUri(), inputContentInfo.getDescription()); + inputContentInfo.releasePermission(); + } + }; + new Thread(runnable).start(); + } + } catch (RuntimeException e) { + Log.e("GIFEditText", "Input connection to GIF selection failed"); + e.printStackTrace(); + return false; + } + return true; + } + }); + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/MainActivity.java new file mode 100644 index 000000000..b4c3f46dc --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/MainActivity.java @@ -0,0 +1,633 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.instantmotiontracking; + +import static java.lang.Math.max; + +import android.content.ClipDescription; +import android.content.Context; +import android.content.Intent; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; +import android.graphics.Color; +import android.graphics.Matrix; +import android.graphics.drawable.Drawable; +import android.hardware.Sensor; +import android.hardware.SensorEvent; +import android.hardware.SensorEventListener; +import android.hardware.SensorManager; +import android.net.Uri; +import android.os.Bundle; +import android.util.Log; +import android.util.Size; +import android.view.MotionEvent; +import android.view.SurfaceHolder; +import android.view.View; +import android.view.ViewGroup; +import android.view.inputmethod.InputMethodManager; +import android.widget.ImageButton; +import android.widget.ImageView; +import android.widget.LinearLayout; +import com.bumptech.glide.Glide; +import com.bumptech.glide.load.resource.gif.GifDrawable; +import com.bumptech.glide.request.target.CustomTarget; +import com.bumptech.glide.request.transition.Transition; +import com.google.mediapipe.components.FrameProcessor; +import com.google.mediapipe.framework.AndroidPacketCreator; +import com.google.mediapipe.framework.Packet; +import java.io.InputStream; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * This is the MainActivity that handles camera input, IMU sensor data acquisition + * and sticker management for the InstantMotionTracking MediaPipe project. + */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "InstantMotionTrackingMainActivity"; + + // Allows for automated packet transmission to graph + private MediaPipePacketManager mediaPipePacketManager; + + private static final int TARGET_CAMERA_WIDTH = 960; + private static final int TARGET_CAMERA_HEIGHT = 1280; + private static final float TARGET_CAMERA_ASPECT_RATIO = + (float) TARGET_CAMERA_WIDTH / (float) TARGET_CAMERA_HEIGHT; + + // Bounds for a single click (sticker anchor reset) + private static final long CLICK_DURATION = 300; // ms + private long clickStartMillis = 0; + private ViewGroup viewGroup; + // Contains dynamic layout of sticker data controller + private LinearLayout buttonLayout; + + private ArrayList stickerArrayList; + // Current sticker being edited by user + private StickerManager currentSticker; + // Trip value used to determine sticker re-anchoring + private static final String STICKER_SENTINEL_TAG = "sticker_sentinel"; + private int stickerSentinel = -1; + + // Define parameters for 'reactivity' of object + private static final float ROTATION_SPEED = 5.0f; + private static final float SCALING_FACTOR = 0.025f; + + // Parameters of device visual field for rendering system + // (68 degrees, 4:3 for Pixel 4) + // TODO : Make acquisition of this information automated + private static final float VERTICAL_FOV_RADIANS = (float) Math.toRadians(68.0); + private static final String FOV_SIDE_PACKET_TAG = "vertical_fov_radians"; + private static final String ASPECT_RATIO_SIDE_PACKET_TAG = "aspect_ratio"; + + private static final String IMU_MATRIX_TAG = "imu_rotation_matrix"; + private static final int SENSOR_SAMPLE_DELAY = SensorManager.SENSOR_DELAY_FASTEST; + private final float[] rotationMatrix = new float[9]; + + private static final String STICKER_PROTO_TAG = "sticker_proto_string"; + // Assets for object rendering + // All animation assets and tags for the first asset (1) + private Bitmap asset3dTexture = null; + private static final String ASSET_3D_TEXTURE = "robot/robot_texture.jpg"; + private static final String ASSET_3D_FILE = "robot/robot.obj.uuu"; + private static final String ASSET_3D_TEXTURE_TAG = "texture_3d"; + private static final String ASSET_3D_TAG = "asset_3d"; + // All GIF animation assets and tags + private GIFEditText editText; + private ArrayList gifBitmaps = new ArrayList<>(); + private int gifCurrentIndex = 0; + private Bitmap defaultGIFTexture = null; // Texture sent if no gif available + // last time the GIF was updated + private long gifLastFrameUpdateMS = System.currentTimeMillis(); + private static final int GIF_FRAME_RATE = 20; // 20 FPS + private static final String GIF_ASPECT_RATIO_TAG = "gif_aspect_ratio"; + private static final String DEFAULT_GIF_TEXTURE = "gif/default_gif_texture.jpg"; + private static final String GIF_FILE = "gif/gif.obj.uuu"; + private static final String GIF_TEXTURE_TAG = "gif_texture"; + private static final String GIF_ASSET_TAG = "gif_asset_name"; + + private int cameraWidth = TARGET_CAMERA_WIDTH; + private int cameraHeight = TARGET_CAMERA_HEIGHT; + + @Override + protected Size cameraTargetResolution() { + // Camera size is in landscape, so here we have (height, width) + return new Size(TARGET_CAMERA_HEIGHT, TARGET_CAMERA_WIDTH); + } + + @Override + protected Size computeViewSize(int width, int height) { + // Try to force aspect ratio of view size to match our target aspect ratio + return new Size(height, (int) (height * TARGET_CAMERA_ASPECT_RATIO)); + } + + @Override + protected void onPreviewDisplaySurfaceChanged( + SurfaceHolder holder, int format, int width, int height) { + super.onPreviewDisplaySurfaceChanged(holder, format, width, height); + boolean isCameraRotated = cameraHelper.isCameraRotated(); + + // cameraImageSize computation logic duplicated from base MainActivity + Size viewSize = computeViewSize(width, height); + Size cameraImageSize = cameraHelper.computeDisplaySizeFromViewSize(viewSize); + cameraWidth = + isCameraRotated ? cameraImageSize.getHeight() : cameraImageSize.getWidth(); + cameraHeight = + isCameraRotated ? cameraImageSize.getWidth() : cameraImageSize.getHeight(); + } + + @Override + protected void onCreate(Bundle savedInstanceState) { + + super.onCreate(savedInstanceState); + + editText = findViewById(R.id.gif_edit_text); + editText.setGIFCommitListener( + new GIFEditText.GIFCommitListener() { + @Override + public void onGIFCommit(Uri contentUri, ClipDescription description) { + // The application must have permission to access the GIF content + grantUriPermission( + "com.google.mediapipe.apps.instantmotiontracking", + contentUri, + Intent.FLAG_GRANT_READ_URI_PERMISSION); + // Set GIF frames from content URI + setGIFBitmaps(contentUri.toString()); + // Close the keyboard upon GIF acquisition + closeKeyboard(); + } + }); + + // Send loaded 3d render assets as side packets to graph + prepareDemoAssets(); + AndroidPacketCreator packetCreator = processor.getPacketCreator(); + + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(ASSET_3D_TEXTURE_TAG, + packetCreator.createRgbaImageFrame(asset3dTexture)); + inputSidePackets.put(ASSET_3D_TAG, + packetCreator.createString(ASSET_3D_FILE)); + inputSidePackets.put(GIF_ASSET_TAG, + packetCreator.createString(GIF_FILE)); + processor.setInputSidePackets(inputSidePackets); + + // Add frame listener to PacketManagement system + mediaPipePacketManager = new MediaPipePacketManager(); + processor.setOnWillAddFrameListener(mediaPipePacketManager); + + // Send device properties to render objects via OpenGL + Map devicePropertiesSidePackets = new HashMap<>(); + // TODO: Note that if our actual camera stream resolution does not match the + // requested aspect ratio, then we will need to update the value used for + // this packet, or else tracking results will be off. + devicePropertiesSidePackets.put( + ASPECT_RATIO_SIDE_PACKET_TAG, packetCreator.createFloat32(TARGET_CAMERA_ASPECT_RATIO)); + devicePropertiesSidePackets.put( + FOV_SIDE_PACKET_TAG, packetCreator.createFloat32(VERTICAL_FOV_RADIANS)); + processor.setInputSidePackets(devicePropertiesSidePackets); + + // Begin with 0 stickers in dataset + stickerArrayList = new ArrayList<>(); + currentSticker = null; + + SensorManager sensorManager = (SensorManager) getSystemService(SENSOR_SERVICE); + List sensorList = sensorManager.getSensorList(Sensor.TYPE_ROTATION_VECTOR); + sensorManager.registerListener( + new SensorEventListener() { + private final float[] rotMatFromVec = new float[9]; + + @Override + public void onAccuracyChanged(Sensor sensor, int accuracy) {} + // Update procedure on sensor adjustment (phone changes orientation) + + @Override + public void onSensorChanged(SensorEvent event) { + // Get the Rotation Matrix from the Rotation Vector + SensorManager.getRotationMatrixFromVector(rotMatFromVec, event.values); + // AXIS_MINUS_X is used to remap the rotation matrix for left hand + // rules in the MediaPipe graph + SensorManager.remapCoordinateSystem( + rotMatFromVec, SensorManager.AXIS_MINUS_X, SensorManager.AXIS_Y, rotationMatrix); + } + }, + (Sensor) sensorList.get(0), + SENSOR_SAMPLE_DELAY); + + // Mechanisms for zoom, pinch, rotation, tap gestures + buttonLayout = (LinearLayout) findViewById(R.id.button_layout); + viewGroup = findViewById(R.id.preview_display_layout); + viewGroup.setOnTouchListener( + new View.OnTouchListener() { + @Override + public boolean onTouch(View v, MotionEvent event) { + return manageUiTouch(event); + } + }); + refreshUi(); + } + + // Obtain our custom activity_main layout for InstantMotionTracking + @Override + protected int getContentViewLayoutResId() { + return R.layout.instant_motion_tracking_activity_main; + } + + // Manages a touch event in order to perform placement/rotation/scaling gestures + // on virtual sticker objects. + private boolean manageUiTouch(MotionEvent event) { + if (currentSticker != null) { + switch (event.getAction()) { + // Detecting a single click for object re-anchoring + case (MotionEvent.ACTION_DOWN): + clickStartMillis = System.currentTimeMillis(); + break; + case (MotionEvent.ACTION_UP): + if (System.currentTimeMillis() - clickStartMillis <= CLICK_DURATION) { + recordClick(event); + } + break; + case (MotionEvent.ACTION_MOVE): + // Rotation and Scaling are independent events and can occur simulataneously + if (event.getPointerCount() == 2) { + if (event.getHistorySize() > 1) { + // Calculate user scaling of sticker + float newScaleFactor = getNewScaleFactor(event, currentSticker.getScaleFactor()); + currentSticker.setScaleFactor(newScaleFactor); + // calculate rotation (radians) for dynamic y-axis rotations + float rotationIncrement = calculateRotationRadians(event); + currentSticker.setRotation(currentSticker.getRotation() + rotationIncrement); + } + } + break; + default: + // fall out + } + } + return true; + } + + // Returns a float value that is equal to the radians of rotation from a two-finger + // MotionEvent recorded by the OnTouchListener. + private static float calculateRotationRadians(MotionEvent event) { + float tangentA = + (float) Math.atan2(event.getY(1) - event.getY(0), event.getX(1) - event.getX(0)); + float tangentB = + (float) + Math.atan2( + event.getHistoricalY(1, 0) - event.getHistoricalY(0, 0), + event.getHistoricalX(1, 0) - event.getHistoricalX(0, 0)); + float angle = ((float) Math.toDegrees(tangentA - tangentB)) % 360f; + angle += ((angle < -180f) ? +360f : ((angle > 180f) ? -360f : 0.0f)); + float rotationIncrement = (float) (Math.PI * ((angle * ROTATION_SPEED) / 180)); + return rotationIncrement; + } + + // Returns a float value that is equal to the translation distance between + // two-fingers that move in a pinch/spreading direction. + private static float getNewScaleFactor(MotionEvent event, float currentScaleFactor) { + double newDistance = getDistance(event.getX(0), event.getY(0), event.getX(1), event.getY(1)); + double oldDistance = + getDistance( + event.getHistoricalX(0, 0), + event.getHistoricalY(0, 0), + event.getHistoricalX(1, 0), + event.getHistoricalY(1, 0)); + float signFloat = + (newDistance < oldDistance) + ? -SCALING_FACTOR + : SCALING_FACTOR; // Are they moving towards each other? + currentScaleFactor *= (1f + signFloat); + return currentScaleFactor; + } + + // Called if a single touch event is recorded on the screen and used to set the + // new anchor position for the current sticker in focus. + private void recordClick(MotionEvent event) { + // First normalize our click position w.r.t. to the view display + float x = (event.getX() / viewGroup.getWidth()); + float y = (event.getY() / viewGroup.getHeight()); + + // MediaPipe can automatically crop our camera stream when displaying it to + // our surface, which can throw off our touch point calulations. So we need + // to replicate that logic here. See FrameScaleMode::kFillAndCrop usage in + // gl_quad_renderer.cc for more details. + float widthRatio = (float) viewGroup.getWidth() / (float) cameraWidth; + float heightRatio = (float) viewGroup.getHeight() / (float) cameraHeight; + + float maxRatio = max(widthRatio, heightRatio); + widthRatio /= maxRatio; + heightRatio /= maxRatio; + + // Now we scale by the scale factors, and then reposition (since cropping + // is always centered) + x *= widthRatio; + x += 0.5f * (1.0f - widthRatio); + y *= heightRatio; + y += 0.5f * (1.0f - heightRatio); + + // Finally, we can pass our adjusted x and y points to the StickerManager + currentSticker.setAnchorCoordinate(x, y); + stickerSentinel = currentSticker.getstickerId(); + } + + // Provided the X and Y coordinates of two points, the distance between them + // will be returned. + private static double getDistance(double x1, double y1, double x2, double y2) { + return Math.hypot((y2 - y1), (x2 - x1)); + } + + // Called upon each button click, and used to populate the buttonLayout with the + // current sticker data in addition to sticker controls (delete, remove, back). + private void refreshUi() { + if (currentSticker != null) { // No sticker in view + buttonLayout.removeAllViews(); + ImageButton deleteSticker = new ImageButton(this); + setControlButtonDesign(deleteSticker, R.drawable.baseline_clear_24); + deleteSticker.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + if (currentSticker != null) { + stickerArrayList.remove(currentSticker); + currentSticker = null; + refreshUi(); + } + } + }); + // Go to home sticker menu + ImageButton goBack = new ImageButton(this); + setControlButtonDesign(goBack, R.drawable.baseline_arrow_back_24); + goBack.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + currentSticker = null; + refreshUi(); + } + }); + // Change sticker to next possible render + ImageButton loopRender = new ImageButton(this); + setControlButtonDesign(loopRender, R.drawable.baseline_loop_24); + loopRender.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + currentSticker.setRender(currentSticker.getRender().iterate()); + refreshUi(); + } + }); + buttonLayout.addView(deleteSticker); + buttonLayout.addView(goBack); + buttonLayout.addView(loopRender); + + // Add the GIF search option if current sticker is GIF + if (currentSticker.getRender() == StickerManager.Render.GIF) { + ImageButton gifSearch = new ImageButton(this); + setControlButtonDesign(gifSearch, R.drawable.baseline_search_24); + gifSearch.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + // Clear the text field to prevent text artifacts in GIF selection + editText.setText(""); + // Open the Keyboard to allow user input + openKeyboard(); + } + }); + buttonLayout.addView(gifSearch); + } + } else { + buttonLayout.removeAllViews(); + // Display stickers + for (final StickerManager sticker : stickerArrayList) { + final ImageButton stickerButton = new ImageButton(this); + stickerButton.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + currentSticker = sticker; + refreshUi(); + } + }); + if (sticker.getRender() == StickerManager.Render.GIF) { + setControlButtonDesign(stickerButton, R.drawable.asset_gif_preview); + } else if (sticker.getRender() == StickerManager.Render.ASSET_3D) { + setStickerButtonDesign(stickerButton, R.drawable.asset_3d_preview); + } + + buttonLayout.addView(stickerButton); + } + ImageButton addSticker = new ImageButton(this); + setControlButtonDesign(addSticker, R.drawable.baseline_add_24); + addSticker.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + StickerManager newSticker = new StickerManager(); + stickerArrayList.add(newSticker); + currentSticker = newSticker; + refreshUi(); + } + }); + ImageButton clearStickers = new ImageButton(this); + setControlButtonDesign(clearStickers, R.drawable.baseline_clear_all_24); + clearStickers.setOnClickListener( + new View.OnClickListener() { + @Override + public void onClick(View v) { + stickerArrayList.clear(); + refreshUi(); + } + }); + + buttonLayout.addView(addSticker); + buttonLayout.addView(clearStickers); + } + } + + // Sets ImageButton UI for Control Buttons. + private void setControlButtonDesign(ImageButton btn, int imageDrawable) { + // btn.setImageDrawable(getResources().getDrawable(imageDrawable)); + btn.setImageDrawable(getDrawable(imageDrawable)); + btn.setBackgroundColor(Color.parseColor("#00ffffff")); + btn.setColorFilter(Color.parseColor("#0494a4")); + btn.setLayoutParams(new LinearLayout.LayoutParams(200, 200)); + btn.setPadding(25, 25, 25, 25); + btn.setScaleType(ImageView.ScaleType.FIT_XY); + } + + // Sets ImageButton UI for Sticker Buttons. + private void setStickerButtonDesign(ImageButton btn, int imageDrawable) { + btn.setImageDrawable(getDrawable(imageDrawable)); + btn.setBackground(getDrawable(R.drawable.circle_button)); + btn.setLayoutParams(new LinearLayout.LayoutParams(250, 250)); + btn.setPadding(25, 25, 25, 25); + btn.setScaleType(ImageView.ScaleType.CENTER_INSIDE); + } + + // Used to set ArrayList of Bitmap frames + private void setGIFBitmaps(String gifUrl) { + gifBitmaps = new ArrayList<>(); // Empty the bitmap array + Glide.with(this) + .asGif() + .load(gifUrl) + .into( + new CustomTarget() { + @Override + public void onLoadCleared(Drawable placeholder) {} + + @Override + public void onResourceReady( + GifDrawable resource, Transition transition) { + try { + Object startConstant = resource.getConstantState(); + Field frameManager = startConstant.getClass().getDeclaredField("frameLoader"); + frameManager.setAccessible(true); + Object frameLoader = frameManager.get(startConstant); + Field decoder = frameLoader.getClass().getDeclaredField("gifDecoder"); + decoder.setAccessible(true); + + Object frameObject = (decoder.get(frameLoader)); + for (int i = 0; i < resource.getFrameCount(); i++) { + frameObject.getClass().getMethod("advance").invoke(frameObject); + Bitmap bmp = + (Bitmap) + frameObject.getClass().getMethod("getNextFrame").invoke(frameObject); + gifBitmaps.add(flipHorizontal(bmp)); + } + } catch (Exception e) { + Log.e(TAG, "", e); + } + } + }); + } + + // Bitmaps must be flipped due to native acquisition of frames from Android OS + private static Bitmap flipHorizontal(Bitmap bmp) { + Matrix matrix = new Matrix(); + // Flip Bitmap frames horizontally + matrix.preScale(-1.0f, 1.0f); + return Bitmap.createBitmap(bmp, 0, 0, bmp.getWidth(), bmp.getHeight(), matrix, true); + } + + // Function that is continuously called in order to time GIF frame updates + private void updateGIFFrame() { + long millisPerFrame = 1000 / GIF_FRAME_RATE; + if (System.currentTimeMillis() - gifLastFrameUpdateMS >= millisPerFrame) { + // Update GIF timestamp + gifLastFrameUpdateMS = System.currentTimeMillis(); + // Cycle through every possible frame and avoid a divide by 0 + gifCurrentIndex = gifBitmaps.isEmpty() ? 1 : (gifCurrentIndex + 1) % gifBitmaps.size(); + } + } + + // Called once to popup the Keyboard via Android OS with focus set to editText + private void openKeyboard() { + editText.requestFocus(); + InputMethodManager imm = (InputMethodManager) getSystemService(Context.INPUT_METHOD_SERVICE); + imm.showSoftInput(editText, InputMethodManager.SHOW_IMPLICIT); + } + + // Called once to close the Keyboard via Android OS + private void closeKeyboard() { + View view = this.getCurrentFocus(); + if (view != null) { + InputMethodManager imm = (InputMethodManager) getSystemService(Context.INPUT_METHOD_SERVICE); + imm.hideSoftInputFromWindow(view.getWindowToken(), 0); + } + } + + private void prepareDemoAssets() { + // We render from raw data with openGL, so disable decoding preprocessing + BitmapFactory.Options decodeOptions = new BitmapFactory.Options(); + decodeOptions.inScaled = false; + decodeOptions.inDither = false; + decodeOptions.inPremultiplied = false; + + try { + InputStream inputStream = getAssets().open(DEFAULT_GIF_TEXTURE); + defaultGIFTexture = + flipHorizontal( + BitmapFactory.decodeStream(inputStream, null /*outPadding*/, decodeOptions)); + inputStream.close(); + } catch (Exception e) { + Log.e(TAG, "Error parsing object texture; error: ", e); + throw new IllegalStateException(e); + } + + try { + InputStream inputStream = getAssets().open(ASSET_3D_TEXTURE); + asset3dTexture = BitmapFactory.decodeStream(inputStream, null /*outPadding*/, decodeOptions); + inputStream.close(); + } catch (Exception e) { + Log.e(TAG, "Error parsing object texture; error: ", e); + throw new IllegalStateException(e); + } + } + + private class MediaPipePacketManager implements FrameProcessor.OnWillAddFrameListener { + @Override + public void onWillAddFrame(long timestamp) { + // set current GIF bitmap as default texture + Bitmap currentGIFBitmap = defaultGIFTexture; + // If current index is in bounds, display current frame + if (gifCurrentIndex <= gifBitmaps.size() - 1) { + currentGIFBitmap = gifBitmaps.get(gifCurrentIndex); + } + // Update to next GIF frame based on timing and frame rate + updateGIFFrame(); + + // Calculate and set the aspect ratio of the GIF + float gifAspectRatio = + (float) currentGIFBitmap.getWidth() / (float) currentGIFBitmap.getHeight(); + + Packet stickerSentinelPacket = processor.getPacketCreator().createInt32(stickerSentinel); + // Sticker sentinel value must be reset for next graph iteration + stickerSentinel = -1; + // Initialize sticker data protobufferpacket information + Packet stickerProtoDataPacket = + processor + .getPacketCreator() + .createSerializedProto(StickerManager.getMessageLiteData(stickerArrayList)); + // Define and set the IMU sensory information float array + Packet imuDataPacket = processor.getPacketCreator().createFloat32Array(rotationMatrix); + // Communicate GIF textures (dynamic texturing) to graph + Packet gifTexturePacket = processor.getPacketCreator().createRgbaImageFrame(currentGIFBitmap); + Packet gifAspectRatioPacket = processor.getPacketCreator().createFloat32(gifAspectRatio); + processor + .getGraph() + .addConsumablePacketToInputStream(STICKER_SENTINEL_TAG, stickerSentinelPacket, timestamp); + processor + .getGraph() + .addConsumablePacketToInputStream(STICKER_PROTO_TAG, stickerProtoDataPacket, timestamp); + processor + .getGraph() + .addConsumablePacketToInputStream(IMU_MATRIX_TAG, imuDataPacket, timestamp); + processor + .getGraph() + .addConsumablePacketToInputStream(GIF_TEXTURE_TAG, gifTexturePacket, timestamp); + processor + .getGraph() + .addConsumablePacketToInputStream(GIF_ASPECT_RATIO_TAG, gifAspectRatioPacket, timestamp); + stickerSentinelPacket.release(); + stickerProtoDataPacket.release(); + imuDataPacket.release(); + gifTexturePacket.release(); + gifAspectRatioPacket.release(); + } + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/StickerManager.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/StickerManager.java new file mode 100644 index 000000000..e6da53624 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/StickerManager.java @@ -0,0 +1,191 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.instantmotiontracking; + +import com.google.mediapipe.graphs.instantmotiontracking.StickerBufferProto.Sticker; +import com.google.mediapipe.graphs.instantmotiontracking.StickerBufferProto.StickerRoll; +import java.util.ArrayList; + +/** + * This class represents a single sticker object placed in the + * instantmotiontracking system. StickerManagers represent a unique object to render + * and manipulate in an AR scene. + *

A sticker has a sticker_id (a unique integer identifying a sticker object + * to render), x and y normalized anchor coordinates [0.0-1.0], user inputs for + * rotation in radians, scaling, and a renderID (another unique integer which + * determines what object model to render for this unique sticker). + */ +public class StickerManager { + + /** All types of possible objects to render for our application. */ + public enum Render { + // Every possible render for a sticker object + GIF, + ASSET_3D; + + /** + * Once called, will set the value of the current render to the next + * possible Render available. If all possible Renders have been iterated + * through, the function will loop and set to the first available Render. + */ + public Render iterate() { + int newEnumIdx = (this.ordinal() + 1) % Render.values().length; + return Render.values()[newEnumIdx]; + } + } + + // Current render of the sticker object + private Render currentRender; + + // Normalized X and Y coordinates of anchor + // (0,0) lies at top-left corner of screen + // (1.0,1.0) lies at bottom-right corner of screen + private float anchorX; + private float anchorY; + + // Rotation in radians from user + private float userRotation = 0f; + // Scaling factor as defined by user (defaults to 1.0) + private float userScalingFactor = 1f; + + // Unique sticker integer ID + private final int stickerId; + + // Used to determine next stickerId + private static int globalIDLimit = 1; + + /** + * Used to create a StickerManager object with a newly generated stickerId and a + * default Render of the first possible render in our Render enum. + */ + public StickerManager() { + // Every sticker will have a default render of the first 3D asset + this.currentRender = Render.values()[1]; + // StickerManager will render out of view by default + this.setAnchorCoordinate(2.0f, 2.0f); + // Set the global sticker ID limit for the next sticker + stickerId = StickerManager.globalIDLimit++; + } + + /** + * Used to create a StickerManager object with a newly generated stickerId. + * + * @param render initial Render of the new StickerManager object + */ + public StickerManager(Render render) { + this.currentRender = render; + // StickerManager will render out of view by default + this.setAnchorCoordinate(2.0f, 2.0f); + // Set the global sticker ID limit for the next sticker + stickerId = StickerManager.globalIDLimit++; + } + + /** + * Used to get the sticker ID of the object. + * + * @return integer of the unique sticker ID + */ + public int getstickerId() { + return this.stickerId; + } + + /** + * Used to update or reset the anchor positions in normalized [0.0-1.0] + * coordinate space for the sticker object. + * + * @param normalizedX normalized X coordinate for the new anchor position + * @param normalizedY normalized Y coordinate for the new anchor position + */ + public void setAnchorCoordinate(float normalizedX, float normalizedY) { + this.anchorX = normalizedX; + this.anchorY = normalizedY; + } + + /** Returns the normalized X anchor coordinate of the sticker object. */ + public float getAnchorX() { + return anchorX; + } + + /** Returns the normalized Y anchor coordinate of the sticker object. */ + public float getAnchorY() { + return anchorY; + } + + /** Returns current asset to be rendered for this sticker object. */ + public Render getRender() { + return currentRender; + } + + /** Set render for this sticker object */ + public void setRender(Render render) { + this.currentRender = render; + } + + /** + * Sets new user value of rotation radians. This rotation is not cumulative, + * and must be set to an absolute value of rotation applied to the object. + * + * @param radians specified radians to rotate the sticker object by + */ + public void setRotation(float radians) { + this.userRotation = radians; + } + + /** Returns current user radian rotation setting. */ + public float getRotation() { + return this.userRotation; + } + + /** + * Sets new user scale factor. This factor will be proportional to the scale + * of the sticker object. + * + * @param scaling scale factor to be applied + */ + public void setScaleFactor(float scaling) { + this.userScalingFactor = scaling; + } + + /** Returns current user scale factor setting. */ + public float getScaleFactor() { + return this.userScalingFactor; + } + + /** + * This method converts an ArrayList of stickers to a MessageLite object + * which can be passed directly to the MediaPipe graph. + * + * @param stickerArrayList ArrayList of StickerManager objects to convert to data string + * @return MessageLite protobuffer of all sticker data + */ + public static StickerRoll getMessageLiteData( + ArrayList stickerArrayList) { + StickerRoll.Builder stickerRollBuilder + = StickerRoll.newBuilder(); + for (final StickerManager sticker : stickerArrayList) { + Sticker protoSticker = + Sticker.newBuilder() + .setId(sticker.getstickerId()) + .setX(sticker.getAnchorX()) + .setY(sticker.getAnchorY()) + .setRotation(sticker.getRotation()) + .setScale(sticker.getScaleFactor()) + .setRenderId(sticker.getRender().ordinal()) + .build(); + stickerRollBuilder.addSticker(protoSticker); + } + return stickerRollBuilder.build(); + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/BUILD new file mode 100644 index 000000000..e60b04c30 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/BUILD @@ -0,0 +1,21 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +exports_files( + srcs = glob(["**"]), +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/default_gif_texture.jpg b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/default_gif_texture.jpg new file mode 100644 index 000000000..27e86f96e Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/default_gif_texture.jpg differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/gif.obj.uuu b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/gif.obj.uuu new file mode 100644 index 000000000..6e63ae6a5 Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/gif/gif.obj.uuu differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot.obj.uuu.zip b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot.obj.uuu.zip new file mode 100644 index 000000000..00a753dd1 Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot.obj.uuu.zip differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot_texture.jpg b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot_texture.jpg new file mode 100644 index 000000000..f41e41a6f Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/assets/robot/robot_texture.jpg differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_3d_preview.png b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_3d_preview.png new file mode 100644 index 000000000..a1242817a Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_3d_preview.png differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_gif_preview.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_gif_preview.xml new file mode 100644 index 000000000..fe7758dd9 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/asset_gif_preview.xml @@ -0,0 +1,16 @@ + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_add_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_add_24.xml new file mode 100644 index 000000000..eb232541d --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_add_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_arrow_back_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_arrow_back_24.xml new file mode 100644 index 000000000..bab545a70 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_arrow_back_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_24.xml new file mode 100644 index 000000000..16d6d37dd --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_all_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_all_24.xml new file mode 100644 index 000000000..dc649f3e2 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_clear_all_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_loop_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_loop_24.xml new file mode 100644 index 000000000..c2f773a17 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_loop_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_search_24.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_search_24.xml new file mode 100644 index 000000000..07b76d627 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/baseline_search_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/circle_button.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/circle_button.xml new file mode 100644 index 000000000..067ae1f8b --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/drawable/circle_button.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/layout/instant_motion_tracking_activity_main.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/layout/instant_motion_tracking_activity_main.xml new file mode 100644 index 000000000..c99a3c4a4 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/instantmotiontracking/res/layout/instant_motion_tracking_activity_main.xml @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD index 202cee82d..473404fdd 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -57,6 +57,7 @@ android_binary( deps = [ ":mediapipe_jni_lib", "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", "//mediapipe/java/com/google/mediapipe/framework:android_framework", ], ) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java index a979e698f..fc4c67755 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java @@ -15,7 +15,13 @@ package com.google.mediapipe.apps.iristrackinggpu; import android.graphics.SurfaceTexture; +import android.os.Bundle; +import android.util.Log; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; import com.google.mediapipe.framework.Packet; +import com.google.mediapipe.framework.PacketGetter; +import com.google.protobuf.InvalidProtocolBufferException; import java.util.HashMap; import java.util.Map; @@ -24,6 +30,7 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { private static final String TAG = "MainActivity"; private static final String FOCAL_LENGTH_STREAM_NAME = "focal_length_pixel"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "face_landmarks_with_iris"; @Override protected void onCameraStarted(SurfaceTexture surfaceTexture) { @@ -37,4 +44,55 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { processor.setInputSidePackets(inputSidePackets); } } + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + byte[] landmarksRaw = PacketGetter.getProtoBytes(packet); + try { + NormalizedLandmarkList landmarks = NormalizedLandmarkList.parseFrom(landmarksRaw); + if (landmarks == null) { + Log.v(TAG, "[TS:" + packet.getTimestamp() + "] No landmarks."); + return; + } + Log.v( + TAG, + "[TS:" + + packet.getTimestamp() + + "] #Landmarks for face (including iris): " + + landmarks.getLandmarkCount()); + Log.v(TAG, getLandmarksDebugString(landmarks)); + } catch (InvalidProtocolBufferException e) { + Log.e(TAG, "Couldn't Exception received - " + e); + return; + } + }); + } + } + + private static String getLandmarksDebugString(NormalizedLandmarkList landmarks) { + int landmarkIndex = 0; + String landmarksString = ""; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + landmarksString += + "\t\tLandmark[" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + return landmarksString; + } } diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD index be6152554..7d4d7418c 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD index fb0e6835f..f07bc8ebc 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -72,11 +72,11 @@ android_binary( ] + select({ "//conditions:default": [ "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:model.obj.uuu", - "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:texture.bmp", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:texture.jpg", ], ":use_chair_model": [ "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:model.obj.uuu", - "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:texture.bmp", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:texture.jpg", ], }), assets_dir = "", diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java index 9f33e4eb6..92f9f55bb 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java @@ -31,7 +31,7 @@ import java.util.Map; public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { private static final String TAG = "MainActivity"; - private static final String OBJ_TEXTURE = "texture.bmp"; + private static final String OBJ_TEXTURE = "texture.jpg"; private static final String OBJ_FILE = "model.obj.uuu"; private static final String BOX_TEXTURE = "classic_colors.png"; private static final String BOX_FILE = "box.obj.uuu"; diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD index ef6f88d65..46d164040 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD index ef6f88d65..46d164040 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.bmp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.bmp deleted file mode 100644 index 0a4d1187d..000000000 Binary files a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.bmp and /dev/null differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.jpg b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.jpg new file mode 100644 index 000000000..759172f5c Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.jpg differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD index ef6f88d65..46d164040 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.bmp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.bmp deleted file mode 100644 index fa6c85a37..000000000 Binary files a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.bmp and /dev/null differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.jpg b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.jpg new file mode 100644 index 000000000..58f641bfe Binary files /dev/null and b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.jpg differ diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectioncpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectioncpu/BUILD index 0f14a48a2..080fe4ced 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectioncpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectioncpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectiongpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectiongpu/BUILD index 84d5364b9..56e70c2b6 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectiongpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetectiongpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD index 75f9c075e..220d48067 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu/BUILD index 4ca58d99b..0ceeeee1b 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/templatematchingcpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD index 660382c5c..fe2da982c 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/examples/coral/BUILD b/mediapipe/examples/coral/BUILD index 338e38d4a..03d4027e7 100644 --- a/mediapipe/examples/coral/BUILD +++ b/mediapipe/examples/coral/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = [ "//visibility:public", diff --git a/mediapipe/examples/desktop/BUILD b/mediapipe/examples/desktop/BUILD index 0e0335157..7772e21da 100644 --- a/mediapipe/examples/desktop/BUILD +++ b/mediapipe/examples/desktop/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = [ "//visibility:public", diff --git a/mediapipe/examples/desktop/autoflip/BUILD b/mediapipe/examples/desktop/autoflip/BUILD index db4625401..9d84e2bdb 100644 --- a/mediapipe/examples/desktop/autoflip/BUILD +++ b/mediapipe/examples/desktop/autoflip/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library" # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/autoflip/calculators/BUILD b/mediapipe/examples/desktop/autoflip/calculators/BUILD index b645dc69f..688084062 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/BUILD +++ b/mediapipe/examples/desktop/autoflip/calculators/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library" # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = [ "//mediapipe/examples:__subpackages__", diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc index e20ebba12..818e6b4a1 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc @@ -173,6 +173,7 @@ TEST(ContentZoomingCalculatorTest, PanConfig) { auto* options = config.mutable_options()->MutableExtension( ContentZoomingCalculatorOptions::ext); options->mutable_kinematic_options_pan()->set_min_motion_to_reframe(0.0); + options->mutable_kinematic_options_pan()->set_update_rate_seconds(2); options->mutable_kinematic_options_tilt()->set_min_motion_to_reframe(5.0); options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(5.0); auto runner = ::absl::make_unique(config); @@ -191,6 +192,7 @@ TEST(ContentZoomingCalculatorTest, TiltConfig) { ContentZoomingCalculatorOptions::ext); options->mutable_kinematic_options_pan()->set_min_motion_to_reframe(5.0); options->mutable_kinematic_options_tilt()->set_min_motion_to_reframe(0.0); + options->mutable_kinematic_options_tilt()->set_update_rate_seconds(2); options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(5.0); auto runner = ::absl::make_unique(config); AddDetection(cv::Rect_(.4, .5, .1, .1), 0, runner.get()); @@ -209,6 +211,7 @@ TEST(ContentZoomingCalculatorTest, ZoomConfig) { options->mutable_kinematic_options_pan()->set_min_motion_to_reframe(5.0); options->mutable_kinematic_options_tilt()->set_min_motion_to_reframe(5.0); options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(0.0); + options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2); auto runner = ::absl::make_unique(config); AddDetection(cv::Rect_(.4, .5, .1, .1), 0, runner.get()); AddDetection(cv::Rect_(.45, .55, .15, .15), 1000000, runner.get()); @@ -345,8 +348,13 @@ TEST(ContentZoomingCalculatorTest, ZoomTestPairSize) { } TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) { - auto runner = ::absl::make_unique( - ParseTextProtoOrDie(kConfigD)); + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->mutable_kinematic_options_pan()->set_update_rate_seconds(2); + options->mutable_kinematic_options_tilt()->set_update_rate_seconds(2); + options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2); + auto runner = ::absl::make_unique(config); AddDetection(cv::Rect_(.95, .95, .05, .05), 0, runner.get()); AddDetection(cv::Rect_(.9, .9, .1, .1), 1000000, runner.get()); MP_ASSERT_OK(runner->Run()); @@ -357,8 +365,13 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) { } TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) { - auto runner = ::absl::make_unique( - ParseTextProtoOrDie(kConfigD)); + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->mutable_kinematic_options_pan()->set_update_rate_seconds(2); + options->mutable_kinematic_options_tilt()->set_update_rate_seconds(2); + options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2); + auto runner = ::absl::make_unique(config); AddDetection(cv::Rect_(0, 0, .05, .05), 0, runner.get()); AddDetection(cv::Rect_(0, 0, .1, .1), 1000000, runner.get()); MP_ASSERT_OK(runner->Run()); diff --git a/mediapipe/examples/desktop/autoflip/calculators/testdata/BUILD b/mediapipe/examples/desktop/autoflip/calculators/testdata/BUILD index 361d77d3c..cd99c8244 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/testdata/BUILD +++ b/mediapipe/examples/desktop/autoflip/calculators/testdata/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) filegroup( name = "test_images", diff --git a/mediapipe/examples/desktop/autoflip/quality/BUILD b/mediapipe/examples/desktop/autoflip/quality/BUILD index 4d79b92de..a6e79c3a3 100644 --- a/mediapipe/examples/desktop/autoflip/quality/BUILD +++ b/mediapipe/examples/desktop/autoflip/quality/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library" # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc index 3d37541cf..573c990d7 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc @@ -12,6 +12,8 @@ namespace autoflip { current_velocity_deg_per_s_ = 0; RET_CHECK_GT(pixels_per_degree_, 0) << "pixels_per_degree must be larger than 0."; + RET_CHECK_GE(options_.update_rate_seconds(), 0) + << "update_rate_seconds must be greater than 0."; RET_CHECK_GE(options_.min_motion_to_reframe(), options_.reframe_window()) << "Reframe window cannot exceed min_motion_to_reframe."; return ::mediapipe::OkStatus(); @@ -41,9 +43,10 @@ namespace autoflip { // Observed velocity and then weighted update of this velocity. double observed_velocity = delta_degs / delta_t; - double updated_velocity = - current_velocity_deg_per_s_ * (1 - options_.update_rate()) + - observed_velocity * options_.update_rate(); + double update_rate = std::min(delta_t / options_.update_rate_seconds(), + options_.max_update_rate()); + double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) + + observed_velocity * update_rate; // Limited current velocity. current_velocity_deg_per_s_ = updated_velocity > 0 ? fmin(updated_velocity, options_.max_velocity()) diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto index 552ead0d9..ac2595328 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto @@ -5,7 +5,7 @@ package mediapipe.autoflip; message KinematicOptions { // Weighted update of new camera velocity (measurement) vs current state // (prediction). - optional double update_rate = 1 [default = 0.5]; + optional double update_rate = 1 [default = 0.5, deprecated = true]; // Max velocity (degrees per second) that the camera can move. optional double max_velocity = 2 [default = 18]; // Min motion (in degrees) to react in pixels. @@ -15,4 +15,9 @@ message KinematicOptions { // total reframe distance on average. Value cannot exceed // min_motion_to_reframe value. optional float reframe_window = 4 [default = 0]; + // Calculation of internal velocity state is: + // min((delta_time_s / update_rate_seconds), max_update_rate) + // where delta_time_s is the time since the last frame. + optional double update_rate_seconds = 5 [default = 0.20]; + optional double max_update_rate = 6 [default = 0.8]; } diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc index d751bd1e3..0bdfb50d2 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc @@ -85,7 +85,8 @@ TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) { KinematicOptions options; // Set min motion to 1deg options.set_min_motion_to_reframe(1.0); - options.set_update_rate(1); + options.set_update_rate_seconds(.0000001); + options.set_max_update_rate(1.0); options.set_max_velocity(1000); // Set degrees / pixel to 16.6 KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); @@ -102,7 +103,8 @@ TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) { KinematicOptions options; // Set min motion to 2deg options.set_min_motion_to_reframe(1.0); - options.set_update_rate(1); + options.set_update_rate_seconds(.0000001); + options.set_max_update_rate(1.0); options.set_max_velocity(18); // Set degrees / pixel to 8.3 KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView); @@ -132,7 +134,8 @@ TEST(KinematicPathSolverTest, PassReframeWindow) { KinematicOptions options; // Set min motion to 1deg options.set_min_motion_to_reframe(1.0); - options.set_update_rate(1); + options.set_update_rate_seconds(.0000001); + options.set_max_update_rate(1.0); options.set_max_velocity(1000); // Set reframe window size to .75 for test. options.set_reframe_window(0.75); @@ -147,10 +150,41 @@ TEST(KinematicPathSolverTest, PassReframeWindow) { EXPECT_EQ(state, 507); } +TEST(KinematicPathSolverTest, PassUpdateRate30FPS) { + KinematicOptions options; + options.set_min_motion_to_reframe(1.0); + options.set_update_rate_seconds(.25); + options.set_max_update_rate(0.8); + options.set_max_velocity(18); + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + int state; + MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); + MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30)); + MP_ASSERT_OK(solver.GetState(&state)); + // (0.033 / .25) * 20 = + EXPECT_EQ(state, 503); +} + +TEST(KinematicPathSolverTest, PassUpdateRate10FPS) { + KinematicOptions options; + options.set_min_motion_to_reframe(1.0); + options.set_update_rate_seconds(.25); + options.set_max_update_rate(0.8); + options.set_max_velocity(18); + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + int state; + MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); + MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10)); + MP_ASSERT_OK(solver.GetState(&state)); + // (0.1 / .25) * 20 = + EXPECT_EQ(state, 508); +} + TEST(KinematicPathSolverTest, PassUpdateRate) { KinematicOptions options; options.set_min_motion_to_reframe(1.0); - options.set_update_rate(0.25); + options.set_update_rate_seconds(4); + options.set_max_update_rate(1.0); options.set_max_velocity(18); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); int state; diff --git a/mediapipe/examples/desktop/autoflip/quality/testdata/BUILD b/mediapipe/examples/desktop/autoflip/quality/testdata/BUILD index 2aff7d6a7..bbfc6340d 100644 --- a/mediapipe/examples/desktop/autoflip/quality/testdata/BUILD +++ b/mediapipe/examples/desktop/autoflip/quality/testdata/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/examples/desktop/autoflip/subgraph/BUILD b/mediapipe/examples/desktop/autoflip/subgraph/BUILD index 4fea2fb92..9af7e447b 100644 --- a/mediapipe/examples/desktop/autoflip/subgraph/BUILD +++ b/mediapipe/examples/desktop/autoflip/subgraph/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgrap # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/face_detection/BUILD b/mediapipe/examples/desktop/face_detection/BUILD index 3d1dbcec8..55c9eb741 100644 --- a/mediapipe/examples/desktop/face_detection/BUILD +++ b/mediapipe/examples/desktop/face_detection/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/face_mesh/BUILD b/mediapipe/examples/desktop/face_mesh/BUILD index 268d590ef..c63814804 100644 --- a/mediapipe/examples/desktop/face_mesh/BUILD +++ b/mediapipe/examples/desktop/face_mesh/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/hair_segmentation/BUILD b/mediapipe/examples/desktop/hair_segmentation/BUILD index 69948e437..9b799f347 100644 --- a/mediapipe/examples/desktop/hair_segmentation/BUILD +++ b/mediapipe/examples/desktop/hair_segmentation/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/hand_tracking/BUILD b/mediapipe/examples/desktop/hand_tracking/BUILD index 1c99b00f6..da6eef456 100644 --- a/mediapipe/examples/desktop/hand_tracking/BUILD +++ b/mediapipe/examples/desktop/hand_tracking/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/hello_world/BUILD b/mediapipe/examples/desktop/hello_world/BUILD index ff36a24f0..edf98bf13 100644 --- a/mediapipe/examples/desktop/hello_world/BUILD +++ b/mediapipe/examples/desktop/hello_world/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/iris_tracking/BUILD b/mediapipe/examples/desktop/iris_tracking/BUILD index 430922115..29812d21c 100644 --- a/mediapipe/examples/desktop/iris_tracking/BUILD +++ b/mediapipe/examples/desktop/iris_tracking/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/media_sequence/BUILD b/mediapipe/examples/desktop/media_sequence/BUILD index 30b37d82a..4e94ebe53 100644 --- a/mediapipe/examples/desktop/media_sequence/BUILD +++ b/mediapipe/examples/desktop/media_sequence/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/multi_hand_tracking/BUILD b/mediapipe/examples/desktop/multi_hand_tracking/BUILD index f83133545..a7bd112ff 100644 --- a/mediapipe/examples/desktop/multi_hand_tracking/BUILD +++ b/mediapipe/examples/desktop/multi_hand_tracking/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/object_detection/BUILD b/mediapipe/examples/desktop/object_detection/BUILD index 66b6d5698..c7860f09a 100644 --- a/mediapipe/examples/desktop/object_detection/BUILD +++ b/mediapipe/examples/desktop/object_detection/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/object_tracking/BUILD b/mediapipe/examples/desktop/object_tracking/BUILD index c4ac24ea0..8a87c5bbc 100644 --- a/mediapipe/examples/desktop/object_tracking/BUILD +++ b/mediapipe/examples/desktop/object_tracking/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/template_matching/BUILD b/mediapipe/examples/desktop/template_matching/BUILD index 4525a71d3..6ee07f71b 100644 --- a/mediapipe/examples/desktop/template_matching/BUILD +++ b/mediapipe/examples/desktop/template_matching/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD b/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD index 9e32c5681..6240864a3 100644 --- a/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD +++ b/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/examples:__subpackages__"]) diff --git a/mediapipe/examples/desktop/youtube8m/BUILD b/mediapipe/examples/desktop/youtube8m/BUILD index 16b868bdc..af85e3113 100644 --- a/mediapipe/examples/desktop/youtube8m/BUILD +++ b/mediapipe/examples/desktop/youtube8m/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_binary( name = "extract_yt8m_features", diff --git a/mediapipe/examples/ios/BUILD b/mediapipe/examples/ios/BUILD index 3cf7f234b..fd611a615 100644 --- a/mediapipe/examples/ios/BUILD +++ b/mediapipe/examples/ios/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/examples/ios/facedetectioncpu/BUILD b/mediapipe/examples/ios/facedetectioncpu/BUILD index 69d54d72b..a4ae2cfca 100644 --- a/mediapipe/examples/ios/facedetectioncpu/BUILD +++ b/mediapipe/examples/ios/facedetectioncpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/facedetectiongpu/BUILD b/mediapipe/examples/ios/facedetectiongpu/BUILD index b06a0a077..507ac45d8 100644 --- a/mediapipe/examples/ios/facedetectiongpu/BUILD +++ b/mediapipe/examples/ios/facedetectiongpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/facemeshgpu/BUILD b/mediapipe/examples/ios/facemeshgpu/BUILD index dbe842285..11bd649bf 100644 --- a/mediapipe/examples/ios/facemeshgpu/BUILD +++ b/mediapipe/examples/ios/facemeshgpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/handdetectiongpu/BUILD b/mediapipe/examples/ios/handdetectiongpu/BUILD index 953f80719..e1fbb8bd6 100644 --- a/mediapipe/examples/ios/handdetectiongpu/BUILD +++ b/mediapipe/examples/ios/handdetectiongpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/handtrackinggpu/BUILD b/mediapipe/examples/ios/handtrackinggpu/BUILD index 162fa2e7d..b3ac999b6 100644 --- a/mediapipe/examples/ios/handtrackinggpu/BUILD +++ b/mediapipe/examples/ios/handtrackinggpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/helloworld/BUILD b/mediapipe/examples/ios/helloworld/BUILD index b1916117d..192996bf3 100644 --- a/mediapipe/examples/ios/helloworld/BUILD +++ b/mediapipe/examples/ios/helloworld/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/iristrackinggpu/BUILD b/mediapipe/examples/ios/iristrackinggpu/BUILD index c5d039236..3cf8d14f7 100644 --- a/mediapipe/examples/ios/iristrackinggpu/BUILD +++ b/mediapipe/examples/ios/iristrackinggpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/multihandtrackinggpu/BUILD b/mediapipe/examples/ios/multihandtrackinggpu/BUILD index 404a50d9f..5616f12b6 100644 --- a/mediapipe/examples/ios/multihandtrackinggpu/BUILD +++ b/mediapipe/examples/ios/multihandtrackinggpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/objectdetectioncpu/BUILD b/mediapipe/examples/ios/objectdetectioncpu/BUILD index 3a18ef89a..5ddd12df6 100644 --- a/mediapipe/examples/ios/objectdetectioncpu/BUILD +++ b/mediapipe/examples/ios/objectdetectioncpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD b/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD index 5b9a08be2..0a2402857 100644 --- a/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD +++ b/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD @@ -22,7 +22,7 @@ load( "example_provisioning", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) MIN_IOS_VERSION = "10.0" diff --git a/mediapipe/examples/python/upper_body_pose_tracker.py b/mediapipe/examples/python/upper_body_pose_tracker.py index b7a195994..edb1dfe73 100644 --- a/mediapipe/examples/python/upper_body_pose_tracker.py +++ b/mediapipe/examples/python/upper_body_pose_tracker.py @@ -38,8 +38,8 @@ import time from typing import Tuple, Union import cv2 -import numpy as np import mediapipe.python as mp +import numpy as np # resources dependency from mediapipe.framework.formats import landmark_pb2 @@ -107,7 +107,7 @@ class UpperBodyPoseTracker: output_file='/tmp/output.png') # Read an image and convert the BGR image to RGB. - input_image = cv2.imread('/tmp/input.png')[:, :, ::-1] + input_image = cv2.cvtColor(cv2.imread('/tmp/input.png'), COLOR_BGR2RGB) pose_landmarks, annotated_image = pose_tracker.run(input_image) pose_tracker.close() """ @@ -150,8 +150,11 @@ class UpperBodyPoseTracker: success, input_frame = cap.read() if not success: break - _, output_frame = self._run_graph(input_frame[:, :, ::-1]) - cv2.imshow('MediaPipe upper body pose tracker', output_frame[:, :, ::-1]) + input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) + input_frame.flags.writeable = False + _, output_frame = self._run_graph(input_frame) + cv2.imshow('MediaPipe upper body pose tracker', + cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR)) if cv2.waitKey(5) & 0xFF == 27: break cap.release() diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index 2140144cd..a61ee12df 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -16,7 +16,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index f44931b32..ebc9ee6c8 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -92,11 +92,11 @@ class IntAdderCalculator : public CalculatorBase { cc->Inputs().Index(i).Set(); } cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -269,11 +269,11 @@ class Delay20Calculator : public CalculatorBase { static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(20)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(20)); cc->Outputs().Index(0).AddPacket(MakePacket(0).At(Timestamp(0))); return ::mediapipe::OkStatus(); } @@ -641,11 +641,11 @@ class OffsetBoundCalculator : public CalculatorBase { static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(0); return ::mediapipe::OkStatus(); } @@ -1446,10 +1446,10 @@ class OffsetAndBoundCalculator : public CalculatorBase { static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(0); return ::mediapipe::OkStatus(); } ::mediapipe::Status Process(CalculatorContext* cc) final { diff --git a/mediapipe/framework/calculator_graph_test.cc b/mediapipe/framework/calculator_graph_test.cc index a70ee02e1..2b63490a0 100644 --- a/mediapipe/framework/calculator_graph_test.cc +++ b/mediapipe/framework/calculator_graph_test.cc @@ -100,11 +100,11 @@ class SquareIntCalculator : public CalculatorBase { static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -134,6 +134,7 @@ class DemuxTimedCalculator : public CalculatorBase { id < cc->Outputs().EndId("OUTPUT"); ++id) { cc->Outputs().Get(id).SetSameAs(data_input); } + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -142,7 +143,6 @@ class DemuxTimedCalculator : public CalculatorBase { data_input_ = cc->Inputs().GetId("INPUT", 0); output_base_ = cc->Outputs().GetId("OUTPUT", 0); num_outputs_ = cc->Outputs().NumEntries("OUTPUT"); - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -194,6 +194,7 @@ class MuxTimedCalculator : public CalculatorBase { } RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); cc->Outputs().Tag("OUTPUT").SetSameAs(data_input0); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -202,7 +203,6 @@ class MuxTimedCalculator : public CalculatorBase { data_input_base_ = cc->Inputs().GetId("INPUT", 0); num_data_inputs_ = cc->Inputs().NumEntries("INPUT"); output_ = cc->Outputs().GetId("OUTPUT", 0); - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -232,11 +232,11 @@ class IntAdderCalculator : public CalculatorBase { cc->Inputs().Index(i).Set(); } cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -260,11 +260,11 @@ class FloatAdderCalculator : public CalculatorBase { cc->Inputs().Index(i).Set(); } cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -288,11 +288,11 @@ class IntMultiplierCalculator : public CalculatorBase { cc->Inputs().Index(i).Set(); } cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -316,12 +316,12 @@ class FloatScalarMultiplierCalculator : public CalculatorBase { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).Set(); cc->InputSidePackets().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { scalar_ = cc->InputSidePackets().Index(0).Get(); - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -343,11 +343,11 @@ class IntToFloatCalculator : public CalculatorBase { static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } @@ -937,11 +937,11 @@ class SemaphoreCalculator : public CalculatorBase { cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); cc->InputSidePackets().Tag("POST_SEM").Set(); cc->InputSidePackets().Tag("WAIT_SEM").Set(); + cc->SetTimestampOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) override { - cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } diff --git a/mediapipe/framework/deps/BUILD b/mediapipe/framework/deps/BUILD index 1cf51ddea..cf2bf46bd 100644 --- a/mediapipe/framework/deps/BUILD +++ b/mediapipe/framework/deps/BUILD @@ -17,7 +17,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index b7ea42e48..fb10550b3 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -20,7 +20,7 @@ package( features = ["-layering_check"], ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) exports_files(["LICENSE"]) diff --git a/mediapipe/framework/formats/annotation/BUILD b/mediapipe/framework/formats/annotation/BUILD index a754b0250..a74b488e8 100644 --- a/mediapipe/framework/formats/annotation/BUILD +++ b/mediapipe/framework/formats/annotation/BUILD @@ -18,7 +18,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") package(default_visibility = ["//visibility:private"]) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) exports_files(["LICENSE"]) diff --git a/mediapipe/framework/formats/matrix_data.proto b/mediapipe/framework/formats/matrix_data.proto index 216d01288..d4aa457a5 100644 --- a/mediapipe/framework/formats/matrix_data.proto +++ b/mediapipe/framework/formats/matrix_data.proto @@ -21,6 +21,9 @@ syntax = "proto2"; package mediapipe; +option java_package = "com.google.mediapipe.formats.proto"; +option java_outer_classname = "MatrixDataProto"; + // Proto for serializing Matrix data. // Data are stored in column-major order by default. message MatrixData { diff --git a/mediapipe/framework/formats/motion/BUILD b/mediapipe/framework/formats/motion/BUILD index a77034c50..f91d2cade 100644 --- a/mediapipe/framework/formats/motion/BUILD +++ b/mediapipe/framework/formats/motion/BUILD @@ -16,7 +16,7 @@ # Description: # Working with dense optical flow in mediapipe. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) exports_files(["LICENSE"]) diff --git a/mediapipe/framework/formats/object_detection/BUILD b/mediapipe/framework/formats/object_detection/BUILD index 4a9f0ca50..39940acdc 100644 --- a/mediapipe/framework/formats/object_detection/BUILD +++ b/mediapipe/framework/formats/object_detection/BUILD @@ -17,7 +17,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/output_stream_manager.cc b/mediapipe/framework/output_stream_manager.cc index d3d9c7a44..fba5ee5a4 100644 --- a/mediapipe/framework/output_stream_manager.cc +++ b/mediapipe/framework/output_stream_manager.cc @@ -24,6 +24,7 @@ namespace mediapipe { const std::string& name, const PacketType* packet_type) { output_stream_spec_.name = name; output_stream_spec_.packet_type = packet_type; + output_stream_spec_.offset_enabled = false; PrepareForRun(nullptr); return ::mediapipe::OkStatus(); } @@ -33,7 +34,6 @@ void OutputStreamManager::PrepareForRun( output_stream_spec_.error_callback = std::move(error_callback); output_stream_spec_.locked_intro_data = false; - output_stream_spec_.offset_enabled = false; output_stream_spec_.header = Packet(); { absl::MutexLock lock(&stream_mutex_); diff --git a/mediapipe/framework/port/BUILD b/mediapipe/framework/port/BUILD index 45cd101ba..78fa44739 100644 --- a/mediapipe/framework/port/BUILD +++ b/mediapipe/framework/port/BUILD @@ -15,7 +15,7 @@ # Description: # Port of various libraries and utilities. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package( default_visibility = ["//visibility:private"], @@ -44,7 +44,10 @@ cc_library( name = "aligned_malloc_and_free", hdrs = ["aligned_malloc_and_free.h"], visibility = ["//visibility:public"], - deps = ["//mediapipe/framework/deps:aligned_malloc_and_free"], + deps = [ + "//mediapipe/framework/deps:aligned_malloc_and_free", + "@com_google_absl//absl/base:core_headers", + ], ) # This proto lib is used by any code that needs to do proto I/O and proto template parsing. diff --git a/mediapipe/framework/port/opencv_imgproc_inc.h b/mediapipe/framework/port/opencv_imgproc_inc.h index 5f5de8ff3..d59c9dc39 100644 --- a/mediapipe/framework/port/opencv_imgproc_inc.h +++ b/mediapipe/framework/port/opencv_imgproc_inc.h @@ -23,6 +23,9 @@ #include #else #include +#if CV_VERSION_MAJOR == 4 +#include +#endif #endif #endif // MEDIAPIPE_PORT_OPENCV_IMGPROC_INC_H_ diff --git a/mediapipe/framework/profiler/BUILD b/mediapipe/framework/profiler/BUILD index 86007b016..432746ea9 100644 --- a/mediapipe/framework/profiler/BUILD +++ b/mediapipe/framework/profiler/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/framework:__subpackages__"]) diff --git a/mediapipe/framework/profiler/reporter/BUILD b/mediapipe/framework/profiler/reporter/BUILD index 83f3dfe9e..6e97a5f3f 100644 --- a/mediapipe/framework/profiler/reporter/BUILD +++ b/mediapipe/framework/profiler/reporter/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "reporter_lib", diff --git a/mediapipe/framework/profiler/testing/BUILD b/mediapipe/framework/profiler/testing/BUILD index 9064473e3..0b0d256e5 100644 --- a/mediapipe/framework/profiler/testing/BUILD +++ b/mediapipe/framework/profiler/testing/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package( default_visibility = ["//mediapipe/framework:__subpackages__"], diff --git a/mediapipe/framework/stream_handler/BUILD b/mediapipe/framework/stream_handler/BUILD index 9dacd4d60..a805ed262 100644 --- a/mediapipe/framework/stream_handler/BUILD +++ b/mediapipe/framework/stream_handler/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package( default_visibility = ["//visibility:private"], diff --git a/mediapipe/framework/testdata/BUILD b/mediapipe/framework/testdata/BUILD index 0b96502cf..3863baebb 100644 --- a/mediapipe/framework/testdata/BUILD +++ b/mediapipe/framework/testdata/BUILD @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 694e2e3a1..24f083dc4 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -23,7 +23,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/tool/testdata/BUILD b/mediapipe/framework/tool/testdata/BUILD index 4227b11f9..906688520 100644 --- a/mediapipe/framework/tool/testdata/BUILD +++ b/mediapipe/framework/tool/testdata/BUILD @@ -18,7 +18,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe:__subpackages__"]) diff --git a/mediapipe/gpu/BUILD b/mediapipe/gpu/BUILD index b92a75e63..4992a3d51 100644 --- a/mediapipe/gpu/BUILD +++ b/mediapipe/gpu/BUILD @@ -16,7 +16,7 @@ load("//mediapipe/gpu:metal.bzl", "metal_library") load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test") load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/gpu/gl_context.cc b/mediapipe/gpu/gl_context.cc index 54b538273..58b6aab76 100644 --- a/mediapipe/gpu/gl_context.cc +++ b/mediapipe/gpu/gl_context.cc @@ -343,6 +343,7 @@ GlContext::~GlContext() { #error This file must be built with ARC. #endif #endif // __OBJC__ + if (thread_) { auto status = thread_->Run([this] { if (profiling_helper_) { @@ -350,9 +351,8 @@ GlContext::~GlContext() { } return ExitContext(nullptr); }); - if (!status.ok()) { - LOG(ERROR) << "Failed to deactivate context on thread: " << status; - } + LOG_IF(ERROR, !status.ok()) + << "Failed to deactivate context on thread: " << status; if (thread_->IsCurrentThread()) { thread_.release()->SelfDestruct(); } @@ -368,40 +368,38 @@ void GlContext::SetProfilingContext( } } +::mediapipe::Status GlContext::SwitchContextAndRun(GlStatusFunction gl_func) { + ContextBinding saved_context; + MP_RETURN_IF_ERROR(EnterContext(&saved_context)) << " (entering GL context)"; + auto status = gl_func(); + LogUncheckedGlErrors(CheckForGlErrors()); + MP_RETURN_IF_ERROR(ExitContext(&saved_context)) << " (exiting GL context)"; + return status; +} + ::mediapipe::Status GlContext::Run(GlStatusFunction gl_func, int node_id, Timestamp input_timestamp) { ::mediapipe::Status status; - if (thread_) { - bool had_gl_errors = false; - status = thread_->Run( - [this, gl_func, node_id, &input_timestamp, &had_gl_errors] { - if (profiling_helper_) { - profiling_helper_->MarkTimestamp(node_id, input_timestamp, - /*is_finish=*/false); - } - auto status = gl_func(); - if (profiling_helper_) { - profiling_helper_->MarkTimestamp(node_id, input_timestamp, - /*is_finish=*/true); - } - had_gl_errors = CheckForGlErrors(); - return status; - }); - LogUncheckedGlErrors(had_gl_errors); - } else { - ContextBinding saved_context; - MP_RETURN_IF_ERROR(EnterContext(&saved_context)); - if (profiling_helper_) { + if (profiling_helper_) { + gl_func = [=] { profiling_helper_->MarkTimestamp(node_id, input_timestamp, /*is_finish=*/false); - } - status = gl_func(); - if (profiling_helper_) { + auto status = gl_func(); profiling_helper_->MarkTimestamp(node_id, input_timestamp, /*is_finish=*/true); - } - LogUncheckedGlErrors(CheckForGlErrors()); - MP_RETURN_IF_ERROR(ExitContext(&saved_context)); + return status; + }; + } + if (thread_) { + bool had_gl_errors = false; + status = thread_->Run([this, gl_func, &had_gl_errors] { + auto status = gl_func(); + had_gl_errors = CheckForGlErrors(); + return status; + }); + LogUncheckedGlErrors(had_gl_errors); + } else { + status = SwitchContextAndRun(gl_func); } return status; } @@ -416,17 +414,12 @@ void GlContext::RunWithoutWaiting(GlVoidFunction gl_func) { }); } else { // TODO: queue up task instead. - ContextBinding saved_context; - auto status = EnterContext(&saved_context); + auto status = SwitchContextAndRun([gl_func] { + gl_func(); + return ::mediapipe::OkStatus(); + }); if (!status.ok()) { - LOG(ERROR) << "Failed to enter context: " << status; - return; - } - gl_func(); - LogUncheckedGlErrors(CheckForGlErrors()); - status = ExitContext(&saved_context); - if (!status.ok()) { - LOG(ERROR) << "Failed to exit context: " << status; + LOG(ERROR) << "Error in RunWithoutWaiting: " << status; } } } @@ -589,7 +582,7 @@ class GlFenceSyncPoint : public GlSyncPoint { void GlMultiSyncPoint::Add(std::shared_ptr new_sync) { for (auto& sync : syncs_) { - if (&sync->GetContext() == &new_sync->GetContext()) { + if (sync->GetContext() == new_sync->GetContext()) { sync = std::move(new_sync); return; } diff --git a/mediapipe/gpu/gl_context.h b/mediapipe/gpu/gl_context.h index b9ca6889c..0e8990470 100644 --- a/mediapipe/gpu/gl_context.h +++ b/mediapipe/gpu/gl_context.h @@ -91,7 +91,7 @@ class GlSyncPoint { // Returns whether the sync point has been reached. Does not block. virtual bool IsReady() = 0; - const GlContext& GetContext() { return *gl_context_; } + const std::shared_ptr& GetContext() { return gl_context_; } protected: std::shared_ptr gl_context_; @@ -366,6 +366,11 @@ class GlContext : public std::enable_shared_from_this { ::mediapipe::Status GetGlExtensions(); ::mediapipe::Status GetGlExtensionsCompat(); + // Make the context current, run gl_func, and restore the previous context. + // Internal helper only; callers should use Run or RunWithoutWaiting instead, + // which delegates to the dedicated thread if required. + ::mediapipe::Status SwitchContextAndRun(GlStatusFunction gl_func); + // The following ContextBinding functions have platform-specific // implementations. diff --git a/mediapipe/gpu/gl_context_egl.cc b/mediapipe/gpu/gl_context_egl.cc index 79f0c30eb..58ab00784 100644 --- a/mediapipe/gpu/gl_context_egl.cc +++ b/mediapipe/gpu/gl_context_egl.cc @@ -191,6 +191,18 @@ void GlContext::DestroyContext() { .IgnoreError(); } +#ifdef __ANDROID__ + if (HasContext()) { + // Detach the current program to work around b/166322604. + if (eglMakeCurrent(display_, surface_, surface_, context_)) { + glUseProgram(0); + } else { + LOG(ERROR) << "eglMakeCurrent() returned error " << std::showbase + << std::hex << eglGetError(); + } + } +#endif // __ANDROID__ + // Destroy the context and surface. if (IsCurrent()) { if (!eglMakeCurrent(display_, EGL_NO_SURFACE, EGL_NO_SURFACE, diff --git a/mediapipe/gpu/gl_context_webgl.cc b/mediapipe/gpu/gl_context_webgl.cc index 19c0ea506..435f16e59 100644 --- a/mediapipe/gpu/gl_context_webgl.cc +++ b/mediapipe/gpu/gl_context_webgl.cc @@ -72,23 +72,23 @@ GlContext::StatusOrGlContext GlContext::Create( // multithreading options, like the special-case combination of USE_PTHREADS // and OFFSCREEN_FRAMEBUFFER) EM_ASM(let init_once = true; if (init_once) { - const __cachedFindCanvasEventTarget = __findCanvasEventTarget; + const cachedFindCanvasEventTarget = findCanvasEventTarget; - if (typeof __cachedFindCanvasEventTarget != = 'function') { + if (typeof cachedFindCanvasEventTarget != = 'function') { if (typeof console != = 'undefined') { console.error( 'Expected Emscripten global function ' + - '"__findCanvasEventTarget" not found. WebGL context creation ' + + '"findCanvasEventTarget" not found. WebGL context creation ' + 'may fail.'); } return; } - __findCanvasEventTarget = function(target) { + findCanvasEventTarget = function(target) { if (Module && Module.canvas) { return Module.canvas; } else if (Module && Module.canvasCssSelector) { - return __cachedFindCanvasEventTarget(Module.canvasCssSelector); + return cachedFindCanvasEventTarget(Module.canvasCssSelector); } else { if (typeof console != = 'undefined') { console.warn('Module properties canvas and canvasCssSelector not ' + @@ -97,7 +97,7 @@ GlContext::StatusOrGlContext GlContext::Create( // We still go through with the find attempt, although for most use // cases it will not succeed, just in case the user does want to fall- // back. - return __cachedFindCanvasEventTarget(target); + return cachedFindCanvasEventTarget(target); } }; // NOLINT: Necessary semicolon. init_once = false; diff --git a/mediapipe/gpu/gl_texture_buffer.cc b/mediapipe/gpu/gl_texture_buffer.cc index cccb8c0fa..d9ec94f0e 100644 --- a/mediapipe/gpu/gl_texture_buffer.cc +++ b/mediapipe/gpu/gl_texture_buffer.cc @@ -23,6 +23,13 @@ std::unique_ptr GlTextureBuffer::Wrap( deletion_callback); } +std::unique_ptr GlTextureBuffer::Wrap( + GLenum target, GLuint name, int width, int height, GpuBufferFormat format, + std::shared_ptr context, DeletionCallback deletion_callback) { + return absl::make_unique(target, name, width, height, format, + deletion_callback, context); +} + std::unique_ptr GlTextureBuffer::Create(int width, int height, GpuBufferFormat format, const void* data) { @@ -36,18 +43,22 @@ std::unique_ptr GlTextureBuffer::Create(int width, int height, GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width, int height, GpuBufferFormat format, - DeletionCallback deletion_callback) + DeletionCallback deletion_callback, + std::shared_ptr producer_context) : name_(name), width_(width), height_(height), format_(format), target_(target), - deletion_callback_(deletion_callback) {} + deletion_callback_(deletion_callback), + producer_context_(producer_context) {} bool GlTextureBuffer::CreateInternal(const void* data) { auto context = GlContext::GetCurrent(); if (!context) return false; + producer_context_ = context; // Save creation GL context. + glGenTextures(1, &name_); if (!name_) return false; @@ -106,6 +117,7 @@ void GlTextureBuffer::Updated(std::shared_ptr prod_token) { CHECK(!producer_sync_) << "Updated existing texture which had not been marked for reuse!"; producer_sync_ = std::move(prod_token); + producer_context_ = producer_sync_->GetContext(); } void GlTextureBuffer::DidRead(std::shared_ptr cons_token) { diff --git a/mediapipe/gpu/gl_texture_buffer.h b/mediapipe/gpu/gl_texture_buffer.h index 9c21571d2..e6c44b6ff 100644 --- a/mediapipe/gpu/gl_texture_buffer.h +++ b/mediapipe/gpu/gl_texture_buffer.h @@ -50,6 +50,11 @@ class GlTextureBuffer { GLenum target, GLuint name, int width, int height, GpuBufferFormat format, DeletionCallback deletion_callback); + // Same as Wrap above, but saves the given context for future use. + static std::unique_ptr Wrap( + GLenum target, GLuint name, int width, int height, GpuBufferFormat format, + std::shared_ptr context, DeletionCallback deletion_callback); + // Creates a texture of dimensions width x height and allocates space for it. // If data is provided, it is uploaded to the texture; otherwise, it can be // provided later via glTexSubImage2D. @@ -63,7 +68,8 @@ class GlTextureBuffer { // The commands producing the texture are assumed to be completed at the // time of this call. If not, call Updated on the result. GlTextureBuffer(GLenum target, GLuint name, int width, int height, - GpuBufferFormat format, DeletionCallback deletion_callback); + GpuBufferFormat format, DeletionCallback deletion_callback, + std::shared_ptr producer_context = nullptr); ~GlTextureBuffer(); // Included to support nativeGetGpuBuffer* in Java. @@ -111,6 +117,11 @@ class GlTextureBuffer { void WaitForConsumers(); void WaitForConsumersOnGpu(); + // Returns the GL context this buffer was created with. + const std::shared_ptr& GetProducerContext() { + return producer_context_; + } + private: // Creates a texture of dimensions width x height and allocates space for it. // If data is provided, it is uploaded to the texture; otherwise, it can be @@ -132,6 +143,7 @@ class GlTextureBuffer { std::unique_ptr consumer_multi_sync_ ABSL_GUARDED_BY( consumer_sync_mutex_) = absl::make_unique(); DeletionCallback deletion_callback_; + std::shared_ptr producer_context_; }; using GlTextureBufferSharedPtr = std::shared_ptr; diff --git a/mediapipe/gpu/gpu_buffer_multi_pool.cc b/mediapipe/gpu/gpu_buffer_multi_pool.cc index c98a3de75..b334f62a8 100644 --- a/mediapipe/gpu/gpu_buffer_multi_pool.cc +++ b/mediapipe/gpu/gpu_buffer_multi_pool.cc @@ -206,10 +206,11 @@ void GpuBufferMultiPool::EntryList::InsertAfter(Entry* entry, Entry* after) { Prepend(entry); } -void GpuBufferMultiPool::Evict() { +void GpuBufferMultiPool::Evict(std::vector* evicted) { // Remove excess entries. while (entry_list_.size() > kMaxPoolCount) { Entry* victim = entry_list_.tail(); + evicted->emplace_back(std::move(victim->pool)); entry_list_.Remove(victim); pools_.erase(victim->spec); } @@ -230,6 +231,7 @@ void GpuBufferMultiPool::Evict() { entry->request_count /= 2; Entry* next = entry->next; if (entry->request_count == 0) { + evicted->emplace_back(std::move(entry->pool)); entry_list_.Remove(entry); pools_.erase(entry->spec); } @@ -240,36 +242,43 @@ void GpuBufferMultiPool::Evict() { GpuBufferMultiPool::SimplePool GpuBufferMultiPool::RequestPool( const BufferSpec& key) { - absl::MutexLock lock(&mutex_); - auto pool_it = pools_.find(key); - Entry* entry; - if (pool_it == pools_.end()) { - std::tie(pool_it, std::ignore) = - pools_.emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(key)); - entry = &pool_it->second; - CHECK_EQ(entry->request_count, 0); - entry->request_count = 1; - entry_list_.Append(entry); - if (entry->prev != nullptr) CHECK_GE(entry->prev->request_count, 1); - } else { - entry = &pool_it->second; - ++entry->request_count; - Entry* larger = entry->prev; - while (larger != nullptr && larger->request_count < entry->request_count) { - larger = larger->prev; + SimplePool pool; + std::vector evicted; + { + absl::MutexLock lock(&mutex_); + auto pool_it = pools_.find(key); + Entry* entry; + if (pool_it == pools_.end()) { + std::tie(pool_it, std::ignore) = + pools_.emplace(std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple(key)); + entry = &pool_it->second; + CHECK_EQ(entry->request_count, 0); + entry->request_count = 1; + entry_list_.Append(entry); + if (entry->prev != nullptr) CHECK_GE(entry->prev->request_count, 1); + } else { + entry = &pool_it->second; + ++entry->request_count; + Entry* larger = entry->prev; + while (larger != nullptr && + larger->request_count < entry->request_count) { + larger = larger->prev; + } + if (larger != entry->prev) { + entry_list_.Remove(entry); + entry_list_.InsertAfter(entry, larger); + } } - if (larger != entry->prev) { - entry_list_.Remove(entry); - entry_list_.InsertAfter(entry, larger); + if (!entry->pool && entry->request_count >= kMinRequestsBeforePool) { + entry->pool = MakeSimplePool(key); } + pool = entry->pool; + ++total_request_count_; + Evict(&evicted); } - if (!entry->pool && entry->request_count >= kMinRequestsBeforePool) { - entry->pool = MakeSimplePool(key); - } - SimplePool pool = entry->pool; - ++total_request_count_; - Evict(); + // Evicted pools, and their buffers, will be released without holding the + // lock. return pool; } diff --git a/mediapipe/gpu/gpu_buffer_multi_pool.h b/mediapipe/gpu/gpu_buffer_multi_pool.h index 8039d18a2..a8ee5a197 100644 --- a/mediapipe/gpu/gpu_buffer_multi_pool.h +++ b/mediapipe/gpu/gpu_buffer_multi_pool.h @@ -160,7 +160,8 @@ class GpuBufferMultiPool { SimplePool RequestPool(const BufferSpec& key); GpuBuffer GetBufferFromSimplePool(BufferSpec spec, const SimplePool& pool); GpuBuffer GetBufferWithoutPool(const BufferSpec& spec); - void Evict() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void Evict(std::vector* evicted) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); absl::Mutex mutex_; std::unordered_map pools_ diff --git a/mediapipe/graphs/edge_detection/BUILD b/mediapipe/graphs/edge_detection/BUILD index 2f47a3dde..fac241106 100644 --- a/mediapipe/graphs/edge_detection/BUILD +++ b/mediapipe/graphs/edge_detection/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/face_detection/BUILD b/mediapipe/graphs/face_detection/BUILD index ccc9995d6..ac54089b2 100644 --- a/mediapipe/graphs/face_detection/BUILD +++ b/mediapipe/graphs/face_detection/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/face_mesh/BUILD b/mediapipe/graphs/face_mesh/BUILD index 961f2907b..6926fda72 100644 --- a/mediapipe/graphs/face_mesh/BUILD +++ b/mediapipe/graphs/face_mesh/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/face_mesh/calculators/BUILD b/mediapipe/graphs/face_mesh/calculators/BUILD new file mode 100644 index 000000000..3bebfc9c8 --- /dev/null +++ b/mediapipe/graphs/face_mesh/calculators/BUILD @@ -0,0 +1,37 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "face_landmarks_to_render_data_calculator", + srcs = ["face_landmarks_to_render_data_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) diff --git a/mediapipe/graphs/face_mesh/calculators/face_landmarks_to_render_data_calculator.cc b/mediapipe/graphs/face_mesh/calculators/face_landmarks_to_render_data_calculator.cc new file mode 100644 index 000000000..7bbea8574 --- /dev/null +++ b/mediapipe/graphs/face_mesh/calculators/face_landmarks_to_render_data_calculator.cc @@ -0,0 +1,101 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.h" +#include "mediapipe/calculators/util/landmarks_to_render_data_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" +namespace mediapipe { + +namespace { + +constexpr int kNumFaceLandmarkConnections = 124; +// Pairs of landmark indices to be rendered with connections. +constexpr int kFaceLandmarkConnections[] = { + // Lips. + 61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321, + 321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267, + 269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14, + 14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81, + 81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308, + // Left eye. + 33, 7, 7, 163, 163, 144, 144, 145, 145, 153, 153, 154, 154, 155, 155, 133, + 33, 246, 246, 161, 161, 160, 160, 159, 159, 158, 158, 157, 157, 173, 173, + 133, + // Left eyebrow. + 46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107, + // Right eye. + 263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382, + 362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398, + 398, 362, + // Right eyebrow. + 276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296, + 336, + // Face oval. + 10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, + 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, + 378, 400, 400, 377, 377, 152, 152, 148, 148, 176, 176, 149, 149, 150, 150, + 136, 136, 172, 172, 58, 58, 132, 132, 93, 93, 234, 234, 127, 127, 162, 162, + 21, 21, 54, 54, 103, 103, 67, 67, 109, 109, 10}; + +} // namespace + +// A calculator that converts face landmarks to RenderData proto for +// visualization. Ignores landmark_connections specified in +// LandmarksToRenderDataCalculatorOptions, if any, and always uses a fixed set +// of landmark connections specific to face landmark (defined in +// kFaceLandmarkConnections[] above). +// +// Example config: +// node { +// calculator: "FaceLandmarksToRenderDataCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// output_stream: "RENDER_DATA:render_data" +// options { +// [LandmarksToRenderDataCalculatorOptions.ext] { +// landmark_color { r: 0 g: 255 b: 0 } +// connection_color { r: 0 g: 255 b: 0 } +// thickness: 4.0 +// } +// } +// } +class FaceLandmarksToRenderDataCalculator + : public LandmarksToRenderDataCalculator { + public: + ::mediapipe::Status Open(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(FaceLandmarksToRenderDataCalculator); + +::mediapipe::Status FaceLandmarksToRenderDataCalculator::Open( + CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + options_ = cc->Options(); + + for (int i = 0; i < kNumFaceLandmarkConnections; ++i) { + landmark_connections_.push_back(kFaceLandmarkConnections[i * 2]); + landmark_connections_.push_back(kFaceLandmarkConnections[i * 2 + 1]); + } + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/face_mesh/subgraphs/BUILD b/mediapipe/graphs/face_mesh/subgraphs/BUILD index 7de55bd0b..a94b47a83 100644 --- a/mediapipe/graphs/face_mesh/subgraphs/BUILD +++ b/mediapipe/graphs/face_mesh/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) @@ -29,6 +29,7 @@ cc_library( "//mediapipe/calculators/util:detections_to_render_data_calculator", "//mediapipe/calculators/util:landmarks_to_render_data_calculator", "//mediapipe/calculators/util:rect_to_render_data_calculator", + "//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator", ], ) diff --git a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt index eee7496a1..f5793f3eb 100644 --- a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt +++ b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt @@ -48,268 +48,14 @@ node { # Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "LandmarksToRenderDataCalculator" + calculator: "FaceLandmarksToRenderDataCalculator" input_stream: "NORM_LANDMARKS:face_landmarks" - output_stream: "RENDER_DATA:landmark_render_data" + output_stream: "RENDER_DATA:landmarks_render_data" node_options: { [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { - # Lips. - landmark_connections: 61 - landmark_connections: 146 - landmark_connections: 146 - landmark_connections: 91 - landmark_connections: 91 - landmark_connections: 181 - landmark_connections: 181 - landmark_connections: 84 - landmark_connections: 84 - landmark_connections: 17 - landmark_connections: 17 - landmark_connections: 314 - landmark_connections: 314 - landmark_connections: 405 - landmark_connections: 405 - landmark_connections: 321 - landmark_connections: 321 - landmark_connections: 375 - landmark_connections: 375 - landmark_connections: 291 - landmark_connections: 61 - landmark_connections: 185 - landmark_connections: 185 - landmark_connections: 40 - landmark_connections: 40 - landmark_connections: 39 - landmark_connections: 39 - landmark_connections: 37 - landmark_connections: 37 - landmark_connections: 0 - landmark_connections: 0 - landmark_connections: 267 - landmark_connections: 267 - landmark_connections: 269 - landmark_connections: 269 - landmark_connections: 270 - landmark_connections: 270 - landmark_connections: 409 - landmark_connections: 409 - landmark_connections: 291 - landmark_connections: 78 - landmark_connections: 95 - landmark_connections: 95 - landmark_connections: 88 - landmark_connections: 88 - landmark_connections: 178 - landmark_connections: 178 - landmark_connections: 87 - landmark_connections: 87 - landmark_connections: 14 - landmark_connections: 14 - landmark_connections: 317 - landmark_connections: 317 - landmark_connections: 402 - landmark_connections: 402 - landmark_connections: 318 - landmark_connections: 318 - landmark_connections: 324 - landmark_connections: 324 - landmark_connections: 308 - landmark_connections: 78 - landmark_connections: 191 - landmark_connections: 191 - landmark_connections: 80 - landmark_connections: 80 - landmark_connections: 81 - landmark_connections: 81 - landmark_connections: 82 - landmark_connections: 82 - landmark_connections: 13 - landmark_connections: 13 - landmark_connections: 312 - landmark_connections: 312 - landmark_connections: 311 - landmark_connections: 311 - landmark_connections: 310 - landmark_connections: 310 - landmark_connections: 415 - landmark_connections: 415 - landmark_connections: 308 - # Left eye. - landmark_connections: 33 - landmark_connections: 7 - landmark_connections: 7 - landmark_connections: 163 - landmark_connections: 163 - landmark_connections: 144 - landmark_connections: 144 - landmark_connections: 145 - landmark_connections: 145 - landmark_connections: 153 - landmark_connections: 153 - landmark_connections: 154 - landmark_connections: 154 - landmark_connections: 155 - landmark_connections: 155 - landmark_connections: 133 - landmark_connections: 33 - landmark_connections: 246 - landmark_connections: 246 - landmark_connections: 161 - landmark_connections: 161 - landmark_connections: 160 - landmark_connections: 160 - landmark_connections: 159 - landmark_connections: 159 - landmark_connections: 158 - landmark_connections: 158 - landmark_connections: 157 - landmark_connections: 157 - landmark_connections: 173 - landmark_connections: 173 - landmark_connections: 133 - # Left eyebrow. - landmark_connections: 46 - landmark_connections: 53 - landmark_connections: 53 - landmark_connections: 52 - landmark_connections: 52 - landmark_connections: 65 - landmark_connections: 65 - landmark_connections: 55 - landmark_connections: 70 - landmark_connections: 63 - landmark_connections: 63 - landmark_connections: 105 - landmark_connections: 105 - landmark_connections: 66 - landmark_connections: 66 - landmark_connections: 107 - # Right eye. - landmark_connections: 263 - landmark_connections: 249 - landmark_connections: 249 - landmark_connections: 390 - landmark_connections: 390 - landmark_connections: 373 - landmark_connections: 373 - landmark_connections: 374 - landmark_connections: 374 - landmark_connections: 380 - landmark_connections: 380 - landmark_connections: 381 - landmark_connections: 381 - landmark_connections: 382 - landmark_connections: 382 - landmark_connections: 362 - landmark_connections: 263 - landmark_connections: 466 - landmark_connections: 466 - landmark_connections: 388 - landmark_connections: 388 - landmark_connections: 387 - landmark_connections: 387 - landmark_connections: 386 - landmark_connections: 386 - landmark_connections: 385 - landmark_connections: 385 - landmark_connections: 384 - landmark_connections: 384 - landmark_connections: 398 - landmark_connections: 398 - landmark_connections: 362 - # Right eyebrow. - landmark_connections: 276 - landmark_connections: 283 - landmark_connections: 283 - landmark_connections: 282 - landmark_connections: 282 - landmark_connections: 295 - landmark_connections: 295 - landmark_connections: 285 - landmark_connections: 300 - landmark_connections: 293 - landmark_connections: 293 - landmark_connections: 334 - landmark_connections: 334 - landmark_connections: 296 - landmark_connections: 296 - landmark_connections: 336 - # Face oval. - landmark_connections: 10 - landmark_connections: 338 - landmark_connections: 338 - landmark_connections: 297 - landmark_connections: 297 - landmark_connections: 332 - landmark_connections: 332 - landmark_connections: 284 - landmark_connections: 284 - landmark_connections: 251 - landmark_connections: 251 - landmark_connections: 389 - landmark_connections: 389 - landmark_connections: 356 - landmark_connections: 356 - landmark_connections: 454 - landmark_connections: 454 - landmark_connections: 323 - landmark_connections: 323 - landmark_connections: 361 - landmark_connections: 361 - landmark_connections: 288 - landmark_connections: 288 - landmark_connections: 397 - landmark_connections: 397 - landmark_connections: 365 - landmark_connections: 365 - landmark_connections: 379 - landmark_connections: 379 - landmark_connections: 378 - landmark_connections: 378 - landmark_connections: 400 - landmark_connections: 400 - landmark_connections: 377 - landmark_connections: 377 - landmark_connections: 152 - landmark_connections: 152 - landmark_connections: 148 - landmark_connections: 148 - landmark_connections: 176 - landmark_connections: 176 - landmark_connections: 149 - landmark_connections: 149 - landmark_connections: 150 - landmark_connections: 150 - landmark_connections: 136 - landmark_connections: 136 - landmark_connections: 172 - landmark_connections: 172 - landmark_connections: 58 - landmark_connections: 58 - landmark_connections: 132 - landmark_connections: 132 - landmark_connections: 93 - landmark_connections: 93 - landmark_connections: 234 - landmark_connections: 234 - landmark_connections: 127 - landmark_connections: 127 - landmark_connections: 162 - landmark_connections: 162 - landmark_connections: 21 - landmark_connections: 21 - landmark_connections: 54 - landmark_connections: 54 - landmark_connections: 103 - landmark_connections: 103 - landmark_connections: 67 - landmark_connections: 67 - landmark_connections: 109 - landmark_connections: 109 - landmark_connections: 10 landmark_color { r: 255 g: 0 b: 0 } connection_color { r: 0 g: 255 b: 0 } - thickness: 1.5 + thickness: 2 visualize_landmark_depth: false } } @@ -320,7 +66,7 @@ node { # timestamp. node { calculator: "EndLoopRenderDataCalculator" - input_stream: "ITEM:landmark_render_data" + input_stream: "ITEM:landmarks_render_data" input_stream: "BATCH_END:landmark_timestamp" output_stream: "ITERABLE:multi_face_landmarks_render_data" } diff --git a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt index 6de89656b..4e2b3f244 100644 --- a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt +++ b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt @@ -48,265 +48,11 @@ node { # Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "LandmarksToRenderDataCalculator" + calculator: "FaceLandmarksToRenderDataCalculator" input_stream: "NORM_LANDMARKS:face_landmarks" output_stream: "RENDER_DATA:landmarks_render_data" node_options: { [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { - # Lips. - landmark_connections: 61 - landmark_connections: 146 - landmark_connections: 146 - landmark_connections: 91 - landmark_connections: 91 - landmark_connections: 181 - landmark_connections: 181 - landmark_connections: 84 - landmark_connections: 84 - landmark_connections: 17 - landmark_connections: 17 - landmark_connections: 314 - landmark_connections: 314 - landmark_connections: 405 - landmark_connections: 405 - landmark_connections: 321 - landmark_connections: 321 - landmark_connections: 375 - landmark_connections: 375 - landmark_connections: 291 - landmark_connections: 61 - landmark_connections: 185 - landmark_connections: 185 - landmark_connections: 40 - landmark_connections: 40 - landmark_connections: 39 - landmark_connections: 39 - landmark_connections: 37 - landmark_connections: 37 - landmark_connections: 0 - landmark_connections: 0 - landmark_connections: 267 - landmark_connections: 267 - landmark_connections: 269 - landmark_connections: 269 - landmark_connections: 270 - landmark_connections: 270 - landmark_connections: 409 - landmark_connections: 409 - landmark_connections: 291 - landmark_connections: 78 - landmark_connections: 95 - landmark_connections: 95 - landmark_connections: 88 - landmark_connections: 88 - landmark_connections: 178 - landmark_connections: 178 - landmark_connections: 87 - landmark_connections: 87 - landmark_connections: 14 - landmark_connections: 14 - landmark_connections: 317 - landmark_connections: 317 - landmark_connections: 402 - landmark_connections: 402 - landmark_connections: 318 - landmark_connections: 318 - landmark_connections: 324 - landmark_connections: 324 - landmark_connections: 308 - landmark_connections: 78 - landmark_connections: 191 - landmark_connections: 191 - landmark_connections: 80 - landmark_connections: 80 - landmark_connections: 81 - landmark_connections: 81 - landmark_connections: 82 - landmark_connections: 82 - landmark_connections: 13 - landmark_connections: 13 - landmark_connections: 312 - landmark_connections: 312 - landmark_connections: 311 - landmark_connections: 311 - landmark_connections: 310 - landmark_connections: 310 - landmark_connections: 415 - landmark_connections: 415 - landmark_connections: 308 - # Left eye. - landmark_connections: 33 - landmark_connections: 7 - landmark_connections: 7 - landmark_connections: 163 - landmark_connections: 163 - landmark_connections: 144 - landmark_connections: 144 - landmark_connections: 145 - landmark_connections: 145 - landmark_connections: 153 - landmark_connections: 153 - landmark_connections: 154 - landmark_connections: 154 - landmark_connections: 155 - landmark_connections: 155 - landmark_connections: 133 - landmark_connections: 33 - landmark_connections: 246 - landmark_connections: 246 - landmark_connections: 161 - landmark_connections: 161 - landmark_connections: 160 - landmark_connections: 160 - landmark_connections: 159 - landmark_connections: 159 - landmark_connections: 158 - landmark_connections: 158 - landmark_connections: 157 - landmark_connections: 157 - landmark_connections: 173 - landmark_connections: 173 - landmark_connections: 133 - # Left eyebrow. - landmark_connections: 46 - landmark_connections: 53 - landmark_connections: 53 - landmark_connections: 52 - landmark_connections: 52 - landmark_connections: 65 - landmark_connections: 65 - landmark_connections: 55 - landmark_connections: 70 - landmark_connections: 63 - landmark_connections: 63 - landmark_connections: 105 - landmark_connections: 105 - landmark_connections: 66 - landmark_connections: 66 - landmark_connections: 107 - # Right eye. - landmark_connections: 263 - landmark_connections: 249 - landmark_connections: 249 - landmark_connections: 390 - landmark_connections: 390 - landmark_connections: 373 - landmark_connections: 373 - landmark_connections: 374 - landmark_connections: 374 - landmark_connections: 380 - landmark_connections: 380 - landmark_connections: 381 - landmark_connections: 381 - landmark_connections: 382 - landmark_connections: 382 - landmark_connections: 362 - landmark_connections: 263 - landmark_connections: 466 - landmark_connections: 466 - landmark_connections: 388 - landmark_connections: 388 - landmark_connections: 387 - landmark_connections: 387 - landmark_connections: 386 - landmark_connections: 386 - landmark_connections: 385 - landmark_connections: 385 - landmark_connections: 384 - landmark_connections: 384 - landmark_connections: 398 - landmark_connections: 398 - landmark_connections: 362 - # Right eyebrow. - landmark_connections: 276 - landmark_connections: 283 - landmark_connections: 283 - landmark_connections: 282 - landmark_connections: 282 - landmark_connections: 295 - landmark_connections: 295 - landmark_connections: 285 - landmark_connections: 300 - landmark_connections: 293 - landmark_connections: 293 - landmark_connections: 334 - landmark_connections: 334 - landmark_connections: 296 - landmark_connections: 296 - landmark_connections: 336 - # Face oval. - landmark_connections: 10 - landmark_connections: 338 - landmark_connections: 338 - landmark_connections: 297 - landmark_connections: 297 - landmark_connections: 332 - landmark_connections: 332 - landmark_connections: 284 - landmark_connections: 284 - landmark_connections: 251 - landmark_connections: 251 - landmark_connections: 389 - landmark_connections: 389 - landmark_connections: 356 - landmark_connections: 356 - landmark_connections: 454 - landmark_connections: 454 - landmark_connections: 323 - landmark_connections: 323 - landmark_connections: 361 - landmark_connections: 361 - landmark_connections: 288 - landmark_connections: 288 - landmark_connections: 397 - landmark_connections: 397 - landmark_connections: 365 - landmark_connections: 365 - landmark_connections: 379 - landmark_connections: 379 - landmark_connections: 378 - landmark_connections: 378 - landmark_connections: 400 - landmark_connections: 400 - landmark_connections: 377 - landmark_connections: 377 - landmark_connections: 152 - landmark_connections: 152 - landmark_connections: 148 - landmark_connections: 148 - landmark_connections: 176 - landmark_connections: 176 - landmark_connections: 149 - landmark_connections: 149 - landmark_connections: 150 - landmark_connections: 150 - landmark_connections: 136 - landmark_connections: 136 - landmark_connections: 172 - landmark_connections: 172 - landmark_connections: 58 - landmark_connections: 58 - landmark_connections: 132 - landmark_connections: 132 - landmark_connections: 93 - landmark_connections: 93 - landmark_connections: 234 - landmark_connections: 234 - landmark_connections: 127 - landmark_connections: 127 - landmark_connections: 162 - landmark_connections: 162 - landmark_connections: 21 - landmark_connections: 21 - landmark_connections: 54 - landmark_connections: 54 - landmark_connections: 103 - landmark_connections: 103 - landmark_connections: 67 - landmark_connections: 67 - landmark_connections: 109 - landmark_connections: 109 - landmark_connections: 10 landmark_color { r: 255 g: 0 b: 0 } connection_color { r: 0 g: 255 b: 0 } thickness: 2 diff --git a/mediapipe/graphs/hair_segmentation/BUILD b/mediapipe/graphs/hair_segmentation/BUILD index 52598175f..b177726bf 100644 --- a/mediapipe/graphs/hair_segmentation/BUILD +++ b/mediapipe/graphs/hair_segmentation/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/hand_tracking/BUILD b/mediapipe/graphs/hand_tracking/BUILD index a84f5d941..2b19d9a54 100644 --- a/mediapipe/graphs/hand_tracking/BUILD +++ b/mediapipe/graphs/hand_tracking/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/hand_tracking/calculators/BUILD b/mediapipe/graphs/hand_tracking/calculators/BUILD index 4ebe1a61d..b2a8efe37 100644 --- a/mediapipe/graphs/hand_tracking/calculators/BUILD +++ b/mediapipe/graphs/hand_tracking/calculators/BUILD @@ -12,20 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) cc_library( - name = "hand_detections_to_rects_calculator", - srcs = ["hand_detections_to_rects_calculator.cc"], + name = "hand_landmarks_to_rect_calculator", + srcs = ["hand_landmarks_to_rect_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/util:detections_to_rects_calculator", - "//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_options_cc_proto", - "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:location_data_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", diff --git a/mediapipe/graphs/hand_tracking/calculators/hand_detections_to_rects_calculator.cc b/mediapipe/graphs/hand_tracking/calculators/hand_detections_to_rects_calculator.cc deleted file mode 100644 index a54a17bb4..000000000 --- a/mediapipe/graphs/hand_tracking/calculators/hand_detections_to_rects_calculator.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2020 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "mediapipe/calculators/util/detections_to_rects_calculator.h" -#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" -#include "mediapipe/framework/calculator_framework.h" -#include "mediapipe/framework/calculator_options.pb.h" -#include "mediapipe/framework/formats/detection.pb.h" -#include "mediapipe/framework/formats/location_data.pb.h" -#include "mediapipe/framework/formats/rect.pb.h" -#include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/framework/port/status.h" - -namespace mediapipe { - -namespace { - -// Indices of joints used for computing the rotation of the output rectangle -// from detection box with keypoints. -constexpr int kWristJoint = 0; -constexpr int kMiddleFingerPIPJoint = 6; -constexpr int kIndexFingerPIPJoint = 4; -constexpr int kRingFingerPIPJoint = 8; -constexpr char kImageSizeTag[] = "IMAGE_SIZE"; - -} // namespace - -// A calculator that converts Hand detection to a bounding box NormalizedRect. -// The calculator overwrites the default logic of DetectionsToRectsCalculator -// for rotating the detection bounding to a rectangle. The rotation angle is -// computed based on 1) the wrist joint and 2) the average of PIP joints of -// index finger, middle finger and ring finger. After rotation, the vector from -// the wrist to the mean of PIP joints is expected to be vertical with wrist at -// the bottom and the mean of PIP joints at the top. -class HandDetectionsToRectsCalculator : public DetectionsToRectsCalculator { - public: - ::mediapipe::Status Open(CalculatorContext* cc) override { - RET_CHECK(cc->Inputs().HasTag(kImageSizeTag)) - << "Image size is required to calculate rotated rect"; - cc->SetOffset(TimestampDiff(0)); - target_angle_ = M_PI * 0.5f; - rotate_ = true; - options_ = cc->Options(); - output_zero_rect_for_empty_detections_ = - options_.output_zero_rect_for_empty_detections(); - - return ::mediapipe::OkStatus(); - } - - private: - ::mediapipe::Status ComputeRotation(const ::mediapipe::Detection& detection, - const DetectionSpec& detection_spec, - float* rotation) override; -}; -REGISTER_CALCULATOR(HandDetectionsToRectsCalculator); - -::mediapipe::Status HandDetectionsToRectsCalculator::ComputeRotation( - const Detection& detection, const DetectionSpec& detection_spec, - float* rotation) { - const auto& location_data = detection.location_data(); - const auto& image_size = detection_spec.image_size; - RET_CHECK(image_size) << "Image size is required to calculate rotation"; - - const float x0 = - location_data.relative_keypoints(kWristJoint).x() * image_size->first; - const float y0 = - location_data.relative_keypoints(kWristJoint).y() * image_size->second; - - float x1 = (location_data.relative_keypoints(kIndexFingerPIPJoint).x() + - location_data.relative_keypoints(kRingFingerPIPJoint).x()) / - 2.f; - float y1 = (location_data.relative_keypoints(kIndexFingerPIPJoint).y() + - location_data.relative_keypoints(kRingFingerPIPJoint).y()) / - 2.f; - x1 = (x1 + location_data.relative_keypoints(kMiddleFingerPIPJoint).x()) / - 2.f * image_size->first; - y1 = (y1 + location_data.relative_keypoints(kMiddleFingerPIPJoint).y()) / - 2.f * image_size->second; - - *rotation = NormalizeRadians(target_angle_ - std::atan2(-(y1 - y0), x1 - x0)); - - return ::mediapipe::OkStatus(); -} - -} // namespace mediapipe diff --git a/mediapipe/graphs/hand_tracking/calculators/hand_landmarks_to_rect_calculator.cc b/mediapipe/graphs/hand_tracking/calculators/hand_landmarks_to_rect_calculator.cc new file mode 100644 index 000000000..156425bed --- /dev/null +++ b/mediapipe/graphs/hand_tracking/calculators/hand_landmarks_to_rect_calculator.cc @@ -0,0 +1,167 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace { + +constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kNormRectTag[] = "NORM_RECT"; +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +constexpr int kWristJoint = 0; +constexpr int kMiddleFingerPIPJoint = 6; +constexpr int kIndexFingerPIPJoint = 4; +constexpr int kRingFingerPIPJoint = 8; +constexpr float kTargetAngle = M_PI * 0.5f; + +inline float NormalizeRadians(float angle) { + return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); +} + +float ComputeRotation(const NormalizedLandmarkList& landmarks, + const std::pair& image_size) { + const float x0 = landmarks.landmark(kWristJoint).x() * image_size.first; + const float y0 = landmarks.landmark(kWristJoint).y() * image_size.second; + + float x1 = (landmarks.landmark(kIndexFingerPIPJoint).x() + + landmarks.landmark(kRingFingerPIPJoint).x()) / + 2.f; + float y1 = (landmarks.landmark(kIndexFingerPIPJoint).y() + + landmarks.landmark(kRingFingerPIPJoint).y()) / + 2.f; + x1 = (x1 + landmarks.landmark(kMiddleFingerPIPJoint).x()) / 2.f * + image_size.first; + y1 = (y1 + landmarks.landmark(kMiddleFingerPIPJoint).y()) / 2.f * + image_size.second; + + const float rotation = + NormalizeRadians(kTargetAngle - std::atan2(-(y1 - y0), x1 - x0)); + return rotation; +} + +::mediapipe::Status NormalizedLandmarkListToRect( + const NormalizedLandmarkList& landmarks, + const std::pair& image_size, NormalizedRect* rect) { + const float rotation = ComputeRotation(landmarks, image_size); + const float reverse_angle = NormalizeRadians(-rotation); + + // Find boundaries of landmarks. + float max_x = std::numeric_limits::min(); + float max_y = std::numeric_limits::min(); + float min_x = std::numeric_limits::max(); + float min_y = std::numeric_limits::max(); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + max_x = std::max(max_x, landmarks.landmark(i).x()); + max_y = std::max(max_y, landmarks.landmark(i).y()); + min_x = std::min(min_x, landmarks.landmark(i).x()); + min_y = std::min(min_y, landmarks.landmark(i).y()); + } + const float axis_aligned_center_x = (max_x + min_x) / 2.f; + const float axis_aligned_center_y = (max_y + min_y) / 2.f; + + // Find boundaries of rotated landmarks. + max_x = std::numeric_limits::min(); + max_y = std::numeric_limits::min(); + min_x = std::numeric_limits::max(); + min_y = std::numeric_limits::max(); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const float original_x = + (landmarks.landmark(i).x() - axis_aligned_center_x) * image_size.first; + const float original_y = + (landmarks.landmark(i).y() - axis_aligned_center_y) * image_size.second; + + const float projected_x = original_x * std::cos(reverse_angle) - + original_y * std::sin(reverse_angle); + const float projected_y = original_x * std::sin(reverse_angle) + + original_y * std::cos(reverse_angle); + + max_x = std::max(max_x, projected_x); + max_y = std::max(max_y, projected_y); + min_x = std::min(min_x, projected_x); + min_y = std::min(min_y, projected_y); + } + const float projected_center_x = (max_x + min_x) / 2.f; + const float projected_center_y = (max_y + min_y) / 2.f; + + const float center_x = projected_center_x * std::cos(rotation) - + projected_center_y * std::sin(rotation) + + image_size.first * axis_aligned_center_x; + const float center_y = projected_center_x * std::sin(rotation) + + projected_center_y * std::cos(rotation) + + image_size.second * axis_aligned_center_y; + const float width = (max_x - min_x) / image_size.first; + const float height = (max_y - min_y) / image_size.second; + + rect->set_x_center(center_x / image_size.first); + rect->set_y_center(center_y / image_size.second); + rect->set_width(width); + rect->set_height(height); + rect->set_rotation(rotation); + + return ::mediapipe::OkStatus(); +} + +} // namespace + +// A calculator that converts subset of hand landmarks to a bounding box +// NormalizedRect. The rotation angle of the bounding box is computed based on +// 1) the wrist joint and 2) the average of PIP joints of index finger, middle +// finger and ring finger. After rotation, the vector from the wrist to the mean +// of PIP joints is expected to be vertical with wrist at the bottom and the +// mean of PIP joints at the top. +class HandLandmarksToRectCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kNormalizedLandmarksTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs().Tag(kNormRectTag).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty()); + + std::pair image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + const auto& landmarks = + cc->Inputs().Tag(kNormalizedLandmarksTag).Get(); + auto output_rect = absl::make_unique(); + MP_RETURN_IF_ERROR( + NormalizedLandmarkListToRect(landmarks, image_size, output_rect.get())); + cc->Outputs() + .Tag(kNormRectTag) + .Add(output_rect.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(HandLandmarksToRectCalculator); + +} // namespace mediapipe diff --git a/mediapipe/graphs/hand_tracking/subgraphs/BUILD b/mediapipe/graphs/hand_tracking/subgraphs/BUILD index 85f59bb18..da57d1447 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/BUILD +++ b/mediapipe/graphs/hand_tracking/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) @@ -88,7 +88,7 @@ mediapipe_simple_subgraph( "//mediapipe/calculators/util:landmarks_to_render_data_calculator", "//mediapipe/calculators/util:rect_transformation_calculator", "//mediapipe/calculators/util:thresholding_calculator", - "//mediapipe/graphs/hand_tracking/calculators:hand_detections_to_rects_calculator", + "//mediapipe/graphs/hand_tracking/calculators:hand_landmarks_to_rect_calculator", ], ) @@ -196,7 +196,7 @@ mediapipe_simple_subgraph( "//mediapipe/calculators/util:landmarks_to_detection_calculator", "//mediapipe/calculators/util:rect_transformation_calculator", "//mediapipe/calculators/util:thresholding_calculator", - "//mediapipe/graphs/hand_tracking/calculators:hand_detections_to_rects_calculator", + "//mediapipe/graphs/hand_tracking/calculators:hand_landmarks_to_rect_calculator", ], ) diff --git a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt index 50881e8a7..9d42ddfbf 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt @@ -124,7 +124,7 @@ node { output_stream: "FLAG:hand_presence" node_options: { [type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] { - threshold: 0.1 + threshold: 0.5 } } } @@ -195,21 +195,14 @@ node { } } -# Converts hand landmarks to a detection that tightly encloses all landmarks. -node { - calculator: "LandmarksToDetectionCalculator" - input_stream: "NORM_LANDMARKS:partial_landmarks" - output_stream: "DETECTION:hand_detection" -} - -# Converts the hand detection into a rectangle (normalized by image size) +# Converts the hand landmarks into a rectangle (normalized by image size) # that encloses the hand. The calculator uses a subset of all hand landmarks # extracted from SplitNormalizedLandmarkListCalculator above to # calculate the bounding box and the rotation of the output rectangle. Please # see the comments in the calculator for more detail. node { - calculator: "HandDetectionsToRectsCalculator" - input_stream: "DETECTION:hand_detection" + calculator: "HandLandmarksToRectCalculator" + input_stream: "NORM_LANDMARKS:partial_landmarks" input_stream: "IMAGE_SIZE:image_size" output_stream: "NORM_RECT:hand_rect_from_landmarks" } @@ -224,8 +217,8 @@ node { output_stream: "hand_rect_for_next_frame" node_options: { [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { - scale_x: 2.1 - scale_y: 2.1 + scale_x: 2.0 + scale_y: 2.0 shift_y: -0.1 square_long: true } diff --git a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt index fa0a00f2c..b3f316a41 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt +++ b/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt @@ -128,7 +128,7 @@ node { output_stream: "FLAG:hand_presence" node_options: { [type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] { - threshold: 0.1 + threshold: 0.5 } } } @@ -199,18 +199,14 @@ node { } } -# Converts hand landmarks to a detection that tightly encloses all landmarks. +# Converts the hand landmarks into a rectangle (normalized by image size) +# that encloses the hand. The calculator uses a subset of all hand landmarks +# extracted from SplitNormalizedLandmarkListCalculator above to +# calculate the bounding box and the rotation of the output rectangle. Please +# see the comments in the calculator for more detail. node { - calculator: "LandmarksToDetectionCalculator" + calculator: "HandLandmarksToRectCalculator" input_stream: "NORM_LANDMARKS:partial_landmarks" - output_stream: "DETECTION:hand_detection" -} - -# Converts the hand detection into a rectangle (normalized by image size) -# that encloses the hand. -node { - calculator: "HandDetectionsToRectsCalculator" - input_stream: "DETECTION:hand_detection" input_stream: "IMAGE_SIZE:image_size" output_stream: "NORM_RECT:hand_rect_from_landmarks" } @@ -225,8 +221,8 @@ node { output_stream: "hand_rect_for_next_frame" node_options: { [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { - scale_x: 2.1 - scale_y: 2.1 + scale_x: 2.0 + scale_y: 2.0 shift_y: -0.1 square_long: true } diff --git a/mediapipe/graphs/instant_motion_tracking/BUILD b/mediapipe/graphs/instant_motion_tracking/BUILD new file mode 100644 index 000000000..e9be58734 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/BUILD @@ -0,0 +1,39 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "instant_motion_tracking_deps", + deps = [ + "//mediapipe/graphs/instant_motion_tracking/calculators:matrices_manager_calculator", + "//mediapipe/graphs/instant_motion_tracking/calculators:sticker_manager_calculator", + "//mediapipe/graphs/instant_motion_tracking/subgraphs:region_tracking", + "//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator", + ], +) + +mediapipe_binary_graph( + name = "instant_motion_tracking_binary_graph", + graph = "instant_motion_tracking.pbtxt", + output_name = "instant_motion_tracking.binarypb", + deps = [":instant_motion_tracking_deps"], +) diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/BUILD b/mediapipe/graphs/instant_motion_tracking/calculators/BUILD new file mode 100644 index 000000000..844ed0332 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/BUILD @@ -0,0 +1,86 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +exports_files(["LICENSE"]) + +proto_library( + name = "sticker_buffer_proto", + srcs = [ + "sticker_buffer.proto", + ], +) + +mediapipe_cc_proto_library( + name = "sticker_buffer_cc_proto", + srcs = [ + "sticker_buffer.proto", + ], + visibility = ["//visibility:public"], + deps = [ + ":sticker_buffer_proto", + ], +) + +cc_library( + name = "sticker_manager_calculator", + srcs = ["sticker_manager_calculator.cc"], + hdrs = ["transformations.h"], + deps = [ + ":sticker_buffer_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_library( + name = "matrices_manager_calculator", + srcs = ["matrices_manager_calculator.cc"], + hdrs = ["transformations.h"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/graphs/object_detection_3d/calculators:box", + "//mediapipe/graphs/object_detection_3d/calculators:model_matrix_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@eigen_archive//:eigen", + ], + alwayslink = 1, +) + +cc_library( + name = "tracked_anchor_manager_calculator", + srcs = ["tracked_anchor_manager_calculator.cc"], + hdrs = ["transformations.h"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/util/tracking:box_tracker_cc_proto", + ], + alwayslink = 1, +) diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/matrices_manager_calculator.cc b/mediapipe/graphs/instant_motion_tracking/calculators/matrices_manager_calculator.cc new file mode 100644 index 000000000..29b942d8e --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/matrices_manager_calculator.cc @@ -0,0 +1,394 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "Eigen/Dense" +#include "Eigen/src/Core/util/Constants.h" +#include "Eigen/src/Geometry/Quaternion.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h" +#include "mediapipe/graphs/object_detection_3d/calculators/box.h" +#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h" + +namespace mediapipe { + +namespace { +using Matrix4fCM = Eigen::Matrix; +using Vector3f = Eigen::Vector3f; +using Matrix3f = Eigen::Matrix3f; +using DiagonalMatrix3f = Eigen::DiagonalMatrix; +constexpr char kAnchorsTag[] = "ANCHORS"; +constexpr char kIMUMatrixTag[] = "IMU_ROTATION"; +constexpr char kUserRotationsTag[] = "USER_ROTATIONS"; +constexpr char kUserScalingsTag[] = "USER_SCALINGS"; +constexpr char kRendersTag[] = "RENDER_DATA"; +constexpr char kGifAspectRatioTag[] = "GIF_ASPECT_RATIO"; +constexpr char kFOVSidePacketTag[] = "FOV"; +constexpr char kAspectRatioSidePacketTag[] = "ASPECT_RATIO"; +// initial Z value (-10 is center point in visual range for OpenGL render) +constexpr float kInitialZ = -10.0f; +} // namespace + +// Intermediary for rotation and translation data to model matrix usable by +// gl_animation_overlay_calculator. For information on the construction of +// OpenGL objects and transformations (including a breakdown of model matrices), +// please visit: https://open.gl/transformations +// +// Input Side Packets: +// FOV - Vertical field of view for device [REQUIRED - Defines perspective +// matrix] ASPECT_RATIO - Aspect ratio of device [REQUIRED - Defines +// perspective matrix] +// +// Input streams: +// ANCHORS - Anchor data with x,y,z coordinates (x,y are in [0.0-1.0] range for +// position on the device screen, while z is the scaling factor that changes +// in proportion to the distance from the tracked region) [REQUIRED] +// IMU_ROTATION - float[9] of row-major device rotation matrix [REQUIRED] +// USER_ROTATIONS - UserRotations with corresponding radians of rotation +// [REQUIRED] +// USER_SCALINGS - UserScalings with corresponding scale factor [REQUIRED] +// GIF_ASPECT_RATIO - Aspect ratio of GIF image used to dynamically scale +// GIF asset defined as width / height [OPTIONAL] +// Output: +// MATRICES - TimedModelMatrixProtoList of each object type to render +// [REQUIRED] +// +// Example config: +// node{ +// calculator: "MatricesManagerCalculator" +// input_stream: "ANCHORS:tracked_scaled_anchor_data" +// input_stream: "IMU_ROTATION:imu_rotation_matrix" +// input_stream: "USER_ROTATIONS:user_rotation_data" +// input_stream: "USER_SCALINGS:user_scaling_data" +// input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio" +// output_stream: "MATRICES:0:first_render_matrices" +// output_stream: "MATRICES:1:second_render_matrices" [unbounded input size] +// input_side_packet: "FOV:vertical_fov_radians" +// input_side_packet: "ASPECT_RATIO:aspect_ratio" +// } + +class MatricesManagerCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + // Device properties that will be preset by side packets + float vertical_fov_radians_ = 0.0f; + float aspect_ratio_ = 0.0f; + float gif_aspect_ratio_ = 1.0f; + + const Matrix3f GenerateUserRotationMatrix(const float rotation_radians) const; + const Matrix4fCM GenerateEigenModelMatrix( + const Vector3f& translation_vector, + const Matrix3f& rotation_submatrix) const; + const Vector3f GenerateAnchorVector(const Anchor& tracked_anchor) const; + DiagonalMatrix3f GetDefaultRenderScaleDiagonal( + const int render_id, const float user_scale_factor, + const float gif_aspect_ratio) const; + + // Returns a user scaling increment associated with the sticker_id + // TODO: Adjust lookup function if total number of stickers is uncapped to + // improve performance + const float GetUserScaler(const std::vector& scalings, + const int sticker_id) const { + for (const UserScaling& user_scaling : scalings) { + if (user_scaling.sticker_id == sticker_id) { + return user_scaling.scale_factor; + } + } + LOG(WARNING) << "Cannot find sticker_id: " << sticker_id + << ", returning 1.0f scaling"; + return 1.0f; + } + + // Returns a user rotation in radians associated with the sticker_id + const float GetUserRotation(const std::vector& rotations, + const int sticker_id) { + for (const UserRotation& rotation : rotations) { + if (rotation.sticker_id == sticker_id) { + return rotation.rotation_radians; + } + } + LOG(WARNING) << "Cannot find sticker_id: " << sticker_id + << ", returning 0.0f rotation"; + return 0.0f; + } +}; + +REGISTER_CALCULATOR(MatricesManagerCalculator); + +::mediapipe::Status MatricesManagerCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) && + cc->Inputs().HasTag(kIMUMatrixTag) && + cc->Inputs().HasTag(kUserRotationsTag) && + cc->Inputs().HasTag(kUserScalingsTag) && + cc->InputSidePackets().HasTag(kFOVSidePacketTag) && + cc->InputSidePackets().HasTag(kAspectRatioSidePacketTag)); + + cc->Inputs().Tag(kAnchorsTag).Set>(); + cc->Inputs().Tag(kIMUMatrixTag).Set(); + cc->Inputs().Tag(kUserScalingsTag).Set>(); + cc->Inputs().Tag(kUserRotationsTag).Set>(); + cc->Inputs().Tag(kRendersTag).Set>(); + if (cc->Inputs().HasTag(kGifAspectRatioTag)) { + cc->Inputs().Tag(kGifAspectRatioTag).Set(); + } + + for (CollectionItemId id = cc->Outputs().BeginId("MATRICES"); + id < cc->Outputs().EndId("MATRICES"); ++id) { + cc->Outputs().Get(id).Set(); + } + cc->InputSidePackets().Tag(kFOVSidePacketTag).Set(); + cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Set(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status MatricesManagerCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + // Set device properties from side packets + vertical_fov_radians_ = + cc->InputSidePackets().Tag(kFOVSidePacketTag).Get(); + aspect_ratio_ = + cc->InputSidePackets().Tag(kAspectRatioSidePacketTag).Get(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status MatricesManagerCalculator::Process(CalculatorContext* cc) { + // Define each object's model matrices + auto asset_matrices_gif = + std::make_unique(); + auto asset_matrices_1 = + std::make_unique(); + // Clear all model matrices + asset_matrices_gif->clear_model_matrix(); + asset_matrices_1->clear_model_matrix(); + + const std::vector user_rotation_data = + cc->Inputs().Tag(kUserRotationsTag).Get>(); + + const std::vector user_scaling_data = + cc->Inputs().Tag(kUserScalingsTag).Get>(); + + const std::vector render_data = + cc->Inputs().Tag(kRendersTag).Get>(); + + const std::vector anchor_data = + cc->Inputs().Tag(kAnchorsTag).Get>(); + if (cc->Inputs().HasTag(kGifAspectRatioTag) && + !cc->Inputs().Tag(kGifAspectRatioTag).IsEmpty()) { + gif_aspect_ratio_ = cc->Inputs().Tag(kGifAspectRatioTag).Get(); + } + + // Device IMU rotation submatrix + const auto imu_matrix = cc->Inputs().Tag(kIMUMatrixTag).Get(); + Matrix3f imu_rotation_submatrix; + int idx = 0; + for (int x = 0; x < 3; ++x) { + for (int y = 0; y < 3; ++y) { + // Input matrix is row-major matrix, it must be reformatted to + // column-major via transpose procedure + imu_rotation_submatrix(y, x) = imu_matrix[idx++]; + } + } + + int render_idx = 0; + for (const Anchor& anchor : anchor_data) { + const int id = anchor.sticker_id; + mediapipe::TimedModelMatrixProto* model_matrix; + // Add model matrix to matrices list for defined object render ID + if (render_data[render_idx] == 0) { // GIF + model_matrix = asset_matrices_gif->add_model_matrix(); + } else { // Asset 3D + if (render_data[render_idx] != 1) { + LOG(ERROR) << "render id: " << render_data[render_idx] + << " is not supported. Fall back to using render_id = 1."; + } + model_matrix = asset_matrices_1->add_model_matrix(); + } + + model_matrix->set_id(id); + + // The user transformation data associated with this sticker must be defined + const float user_rotation_radians = GetUserRotation(user_rotation_data, id); + const float user_scale_factor = GetUserScaler(user_scaling_data, id); + + // A vector representative of a user's sticker rotation transformation can + // be created + const Matrix3f user_rotation_submatrix = + GenerateUserRotationMatrix(user_rotation_radians); + // Next, the diagonal representative of the combined scaling data + const DiagonalMatrix3f scaling_diagonal = GetDefaultRenderScaleDiagonal( + render_data[render_idx], user_scale_factor, gif_aspect_ratio_); + // Increment to next render id from vector + render_idx++; + + // The user transformation data can be concatenated into a final rotation + // submatrix with the device IMU rotational data + const Matrix3f user_transformed_rotation_submatrix = + imu_rotation_submatrix * user_rotation_submatrix * scaling_diagonal; + + // A vector representative of the translation of the object in OpenGL + // coordinate space must be generated + const Vector3f translation_vector = GenerateAnchorVector(anchor); + + // Concatenate all model matrix data + const Matrix4fCM final_model_matrix = GenerateEigenModelMatrix( + translation_vector, user_transformed_rotation_submatrix); + + // The generated model matrix must be mapped to TimedModelMatrixProto + // (col-wise) + for (int x = 0; x < final_model_matrix.rows(); ++x) { + for (int y = 0; y < final_model_matrix.cols(); ++y) { + model_matrix->add_matrix_entries(final_model_matrix(x, y)); + } + } + } + + // Output all individual render matrices + // TODO: Perform depth ordering with gl_animation_overlay_calculator to render + // objects in order by depth to allow occlusion. + cc->Outputs() + .Get(cc->Outputs().GetId("MATRICES", 0)) + .Add(asset_matrices_gif.release(), cc->InputTimestamp()); + cc->Outputs() + .Get(cc->Outputs().GetId("MATRICES", 1)) + .Add(asset_matrices_1.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); +} + +// Using a specified rotation value in radians, generate a rotation matrix for +// use with base rotation submatrix +const Matrix3f MatricesManagerCalculator::GenerateUserRotationMatrix( + const float rotation_radians) const { + Eigen::Matrix3f user_rotation_submatrix; + user_rotation_submatrix = + // The rotation in radians must be inverted to rotate the object + // with the direction of finger movement from the user (system dependent) + Eigen::AngleAxisf(-rotation_radians, Eigen::Vector3f::UnitY()) * + Eigen::AngleAxisf(0.0f, Eigen::Vector3f::UnitZ()) * + // Model orientations all assume z-axis is up, but we need y-axis upwards, + // therefore, a +(M_PI * 0.5f) transformation must be applied + // TODO: Bring default rotations, translations, and scalings into + // independent sticker configuration + Eigen::AngleAxisf(M_PI * 0.5f, Eigen::Vector3f::UnitX()); + // Matrix must be transposed due to the method of submatrix generation in + // Eigen + return user_rotation_submatrix.transpose(); +} + +// TODO: Investigate possible differences in warping of tracking speed across +// screen Using the sticker anchor data, a translation vector can be generated +// in OpenGL coordinate space +const Vector3f MatricesManagerCalculator::GenerateAnchorVector( + const Anchor& tracked_anchor) const { + // Using an initial z-value in OpenGL space, generate a new base z-axis value + // to mimic scaling by distance. + const float z = kInitialZ * tracked_anchor.z; + + // Using triangle geometry, the minimum for a y-coordinate that will appear in + // the view field for the given z value above can be found. + const float y_half_range = z * (tan(vertical_fov_radians_ * 0.5f)); + + // The aspect ratio of the device and y_minimum calculated above can be used + // to find the minimum value for x that will appear in the view field of the + // device screen. + const float x_half_range = y_half_range * aspect_ratio_; + + // Given the minimum bounds of the screen in OpenGL space, the tracked anchor + // coordinates can be converted to OpenGL coordinate space. + // + // (i.e: X and Y will be converted from [0.0-1.0] space to [x_minimum, + // -x_minimum] space and [y_minimum, -y_minimum] space respectively) + const float x = (-2.0f * tracked_anchor.x * x_half_range) + x_half_range; + const float y = (-2.0f * tracked_anchor.y * y_half_range) + y_half_range; + + // A translation transformation vector can be generated via Eigen + const Vector3f t_vector(x, y, z); + return t_vector; +} + +// Generates a model matrix via Eigen with appropriate transformations +const Matrix4fCM MatricesManagerCalculator::GenerateEigenModelMatrix( + const Vector3f& translation_vector, + const Matrix3f& rotation_submatrix) const { + // Define basic empty model matrix + Matrix4fCM mvp_matrix; + + // Set the translation vector + mvp_matrix.topRightCorner<3, 1>() = translation_vector; + + // Set the rotation submatrix + mvp_matrix.topLeftCorner<3, 3>() = rotation_submatrix; + + // Set trailing 1.0 required by OpenGL to define coordinate space + mvp_matrix(3, 3) = 1.0f; + + return mvp_matrix; +} + +// This returns a scaling matrix to alter the projection matrix for +// the specified render id in order to ensure all objects render at a similar +// size in the view screen upon initial placement +DiagonalMatrix3f MatricesManagerCalculator::GetDefaultRenderScaleDiagonal( + const int render_id, const float user_scale_factor, + const float gif_aspect_ratio) const { + float scale_preset = 1.0f; + float x_scalar = 1.0f; + float y_scalar = 1.0f; + + switch (render_id) { + case 0: { // GIF + // 160 is the scaling preset to make the GIF asset appear relatively + // similar in size to all other assets + scale_preset = 160.0f; + if (gif_aspect_ratio >= 1.0f) { + // GIF is wider horizontally (scale on x-axis) + x_scalar = gif_aspect_ratio; + } else { + // GIF is wider vertically (scale on y-axis) + y_scalar = 1.0f / gif_aspect_ratio; + } + break; + } + case 1: { // 3D asset + // 5 is the scaling preset to make the 3D asset appear relatively + // similar in size to all other assets + scale_preset = 5.0f; + break; + } + default: { + LOG(INFO) << "Unsupported render_id: " << render_id + << ", returning default render_scale"; + break; + } + } + + DiagonalMatrix3f scaling(scale_preset * user_scale_factor * x_scalar, + scale_preset * user_scale_factor * y_scalar, + scale_preset * user_scale_factor); + return scaling; +} +} // namespace mediapipe diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.proto b/mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.proto new file mode 100644 index 000000000..b73209cf4 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.proto @@ -0,0 +1,33 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +option java_package = "com.google.mediapipe.graphs.instantmotiontracking"; +option java_outer_classname = "StickerBufferProto"; + +message Sticker { + optional int32 id = 1; + optional float x = 2; + optional float y = 3; + optional float rotation = 4; + optional float scale = 5; + optional int32 render_id = 6; +} + +message StickerRoll { + repeated Sticker sticker = 1; +} diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/sticker_manager_calculator.cc b/mediapipe/graphs/instant_motion_tracking/calculators/sticker_manager_calculator.cc new file mode 100644 index 000000000..96413afb8 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/sticker_manager_calculator.cc @@ -0,0 +1,150 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/graphs/instant_motion_tracking/calculators/sticker_buffer.pb.h" +#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h" + +namespace mediapipe { + +constexpr char kProtoDataString[] = "PROTO"; +constexpr char kAnchorsTag[] = "ANCHORS"; +constexpr char kUserRotationsTag[] = "USER_ROTATIONS"; +constexpr char kUserScalingsTag[] = "USER_SCALINGS"; +constexpr char kRenderDescriptorsTag[] = "RENDER_DATA"; + +// This calculator takes in the sticker protobuffer data and parses each +// individual sticker object into anchors, user rotations and scalings, in +// addition to basic render data represented in integer form. +// +// Input: +// PROTO - String of sticker data in appropriate protobuf format [REQUIRED] +// Output: +// ANCHORS - Anchors with initial normalized X,Y coordinates [REQUIRED] +// USER_ROTATIONS - UserRotations with radians of rotation from user [REQUIRED] +// USER_SCALINGS - UserScalings with increment of scaling from user [REQUIRED] +// RENDER_DATA - Descriptors of which objects/animations to render for stickers +// [REQUIRED] +// +// Example config: +// node { +// calculator: "StickerManagerCalculator" +// input_stream: "PROTO:sticker_proto_string" +// output_stream: "ANCHORS:initial_anchor_data" +// output_stream: "USER_ROTATIONS:user_rotation_data" +// output_stream: "USER_SCALINGS:user_scaling_data" +// output_stream: "RENDER_DATA:sticker_render_data" +// } + +class StickerManagerCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag(kProtoDataString)); + RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) && + cc->Outputs().HasTag(kUserRotationsTag) && + cc->Outputs().HasTag(kUserScalingsTag) && + cc->Outputs().HasTag(kRenderDescriptorsTag)); + + cc->Inputs().Tag(kProtoDataString).Set(); + cc->Outputs().Tag(kAnchorsTag).Set>(); + cc->Outputs().Tag(kUserRotationsTag).Set>(); + cc->Outputs().Tag(kUserScalingsTag).Set>(); + cc->Outputs().Tag(kRenderDescriptorsTag).Set>(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + std::string sticker_proto_string = + cc->Inputs().Tag(kProtoDataString).Get(); + + std::vector initial_anchor_data; + std::vector user_rotation_data; + std::vector user_scaling_data; + std::vector render_data; + + ::mediapipe::StickerRoll sticker_roll; + bool parse_success = sticker_roll.ParseFromString(sticker_proto_string); + + // Ensure parsing was a success + RET_CHECK(parse_success) << "Error parsing sticker protobuf data"; + + for (int i = 0; i < sticker_roll.sticker().size(); ++i) { + // Declare empty structures for sticker data + Anchor initial_anchor; + UserRotation user_rotation; + UserScaling user_scaling; + // Get individual Sticker object as defined by Protobuffer + ::mediapipe::Sticker sticker = sticker_roll.sticker(i); + // Set individual data structure ids to associate with this sticker + initial_anchor.sticker_id = sticker.id(); + user_rotation.sticker_id = sticker.id(); + user_scaling.sticker_id = sticker.id(); + initial_anchor.x = sticker.x(); + initial_anchor.y = sticker.y(); + initial_anchor.z = 1.0f; // default to 1.0 in normalized 3d space + user_rotation.rotation_radians = sticker.rotation(); + user_scaling.scale_factor = sticker.scale(); + const int render_id = sticker.render_id(); + // Set all vector data with sticker attributes + initial_anchor_data.emplace_back(initial_anchor); + user_rotation_data.emplace_back(user_rotation); + user_scaling_data.emplace_back(user_scaling); + render_data.emplace_back(render_id); + } + + if (cc->Outputs().HasTag(kAnchorsTag)) { + cc->Outputs() + .Tag(kAnchorsTag) + .AddPacket(MakePacket>(initial_anchor_data) + .At(cc->InputTimestamp())); + } + if (cc->Outputs().HasTag(kUserRotationsTag)) { + cc->Outputs() + .Tag(kUserRotationsTag) + .AddPacket(MakePacket>(user_rotation_data) + .At(cc->InputTimestamp())); + } + if (cc->Outputs().HasTag(kUserScalingsTag)) { + cc->Outputs() + .Tag(kUserScalingsTag) + .AddPacket(MakePacket>(user_scaling_data) + .At(cc->InputTimestamp())); + } + if (cc->Outputs().HasTag(kRenderDescriptorsTag)) { + cc->Outputs() + .Tag(kRenderDescriptorsTag) + .AddPacket(MakePacket>(render_data) + .At(cc->InputTimestamp())); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Close(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(StickerManagerCalculator); +} // namespace mediapipe diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/tracked_anchor_manager_calculator.cc b/mediapipe/graphs/instant_motion_tracking/calculators/tracked_anchor_manager_calculator.cc new file mode 100644 index 000000000..832ea9217 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/tracked_anchor_manager_calculator.cc @@ -0,0 +1,213 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/graphs/instant_motion_tracking/calculators/transformations.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { + +constexpr char kSentinelTag[] = "SENTINEL"; +constexpr char kAnchorsTag[] = "ANCHORS"; +constexpr char kBoxesInputTag[] = "BOXES"; +constexpr char kBoxesOutputTag[] = "START_POS"; +constexpr char kCancelTag[] = "CANCEL_ID"; +// TODO: Find optimal Height/Width (0.1-0.3) +constexpr float kBoxEdgeSize = + 0.2f; // Used to establish tracking box dimensions +constexpr float kUsToMs = + 1000.0f; // Used to convert from microseconds to millis + +// This calculator manages the regions being tracked for each individual sticker +// and adjusts the regions being tracked if a change is detected in a sticker's +// initial anchor placement. Regions being tracked that have no associated +// sticker will be automatically removed upon the next iteration of the graph to +// optimize performance and remove all sticker artifacts +// +// Input: +// SENTINEL - ID of sticker which has an anchor that must be reset (-1 when no +// anchor must be reset) [REQUIRED] +// ANCHORS - Initial anchor data (tracks changes and where to re/position) +// [REQUIRED] BOXES - Used in cycle, boxes being tracked meant to update +// positions [OPTIONAL +// - provided by subgraph] +// Output: +// START_POS - Positions of boxes being tracked (can be overwritten with ID) +// [REQUIRED] CANCEL_ID - Single integer ID of tracking box to remove from +// tracker subgraph [OPTIONAL] ANCHORS - Updated set of anchors with tracked +// and normalized X,Y,Z [REQUIRED] +// +// Example config: +// node { +// calculator: "TrackedAnchorManagerCalculator" +// input_stream: "SENTINEL:sticker_sentinel" +// input_stream: "ANCHORS:initial_anchor_data" +// input_stream: "BOXES:boxes" +// input_stream_info: { +// tag_index: 'BOXES' +// back_edge: true +// } +// output_stream: "START_POS:start_pos" +// output_stream: "CANCEL_ID:cancel_object_id" +// output_stream: "ANCHORS:tracked_scaled_anchor_data" +// } + +class TrackedAnchorManagerCalculator : public CalculatorBase { + private: + // Previous graph iteration anchor data + std::vector previous_anchor_data_; + + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag(kAnchorsTag) && + cc->Inputs().HasTag(kSentinelTag)); + RET_CHECK(cc->Outputs().HasTag(kAnchorsTag) && + cc->Outputs().HasTag(kBoxesOutputTag)); + + cc->Inputs().Tag(kAnchorsTag).Set>(); + cc->Inputs().Tag(kSentinelTag).Set(); + + if (cc->Inputs().HasTag(kBoxesInputTag)) { + cc->Inputs().Tag(kBoxesInputTag).Set(); + } + + cc->Outputs().Tag(kAnchorsTag).Set>(); + cc->Outputs().Tag(kBoxesOutputTag).Set(); + + if (cc->Outputs().HasTag(kCancelTag)) { + cc->Outputs().Tag(kCancelTag).Set(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(TrackedAnchorManagerCalculator); + +::mediapipe::Status TrackedAnchorManagerCalculator::Process( + CalculatorContext* cc) { + mediapipe::Timestamp timestamp = cc->InputTimestamp(); + const int sticker_sentinel = cc->Inputs().Tag(kSentinelTag).Get(); + std::vector current_anchor_data = + cc->Inputs().Tag(kAnchorsTag).Get>(); + auto pos_boxes = absl::make_unique(); + std::vector tracked_scaled_anchor_data; + + // Delete any boxes being tracked without an associated anchor + for (const mediapipe::TimedBoxProto& box : + cc->Inputs() + .Tag(kBoxesInputTag) + .Get() + .box()) { + bool anchor_exists = false; + for (Anchor anchor : current_anchor_data) { + if (box.id() == anchor.sticker_id) { + anchor_exists = true; + break; + } + } + if (!anchor_exists) { + cc->Outputs() + .Tag(kCancelTag) + .AddPacket(MakePacket(box.id()).At(timestamp++)); + } + } + + // Perform tracking or updating for each anchor position + for (const Anchor& anchor : current_anchor_data) { + Anchor output_anchor = anchor; + // Check if anchor position is being reset by user in this graph iteration + if (sticker_sentinel == anchor.sticker_id) { + // Delete associated tracking box + // TODO: BoxTrackingSubgraph should accept vector to avoid breaking + // timestamp rules + cc->Outputs() + .Tag(kCancelTag) + .AddPacket(MakePacket(anchor.sticker_id).At(timestamp++)); + // Add a tracking box + mediapipe::TimedBoxProto* box = pos_boxes->add_box(); + box->set_left(anchor.x - kBoxEdgeSize * 0.5f); + box->set_right(anchor.x + kBoxEdgeSize * 0.5f); + box->set_top(anchor.y - kBoxEdgeSize * 0.5f); + box->set_bottom(anchor.y + kBoxEdgeSize * 0.5f); + box->set_id(anchor.sticker_id); + box->set_time_msec((timestamp++).Microseconds() / kUsToMs); + // Default value for normalized z (scale factor) + output_anchor.z = 1.0f; + } else { + // Anchor position was not reset by user + // Attempt to update anchor position from tracking subgraph + // (TimedBoxProto) + bool updated_from_tracker = false; + const mediapipe::TimedBoxProtoList box_list = + cc->Inputs().Tag(kBoxesInputTag).Get(); + for (const auto& box : box_list.box()) { + if (box.id() == anchor.sticker_id) { + // Get center x normalized coordinate [0.0-1.0] + output_anchor.x = (box.left() + box.right()) * 0.5f; + // Get center y normalized coordinate [0.0-1.0] + output_anchor.y = (box.top() + box.bottom()) * 0.5f; + // Get center z coordinate [z starts at normalized 1.0 and scales + // inversely with box-width] + // TODO: Look into issues with uniform scaling on x-axis and y-axis + output_anchor.z = kBoxEdgeSize / (box.right() - box.left()); + updated_from_tracker = true; + break; + } + } + // If anchor position was not updated from tracker, create new tracking + // box at last recorded anchor coordinates. This will allow all current + // stickers to be tracked at approximately last location even if + // re-acquisitioning in the BoxTrackingSubgraph encounters errors + if (!updated_from_tracker) { + for (const Anchor& prev_anchor : previous_anchor_data_) { + if (anchor.sticker_id == prev_anchor.sticker_id) { + mediapipe::TimedBoxProto* box = pos_boxes->add_box(); + box->set_left(prev_anchor.x - kBoxEdgeSize * 0.5f); + box->set_right(prev_anchor.x + kBoxEdgeSize * 0.5f); + box->set_top(prev_anchor.y - kBoxEdgeSize * 0.5f); + box->set_bottom(prev_anchor.y + kBoxEdgeSize * 0.5f); + box->set_id(prev_anchor.sticker_id); + box->set_time_msec(cc->InputTimestamp().Microseconds() / kUsToMs); + output_anchor = prev_anchor; + // Default value for normalized z (scale factor) + output_anchor.z = 1.0f; + break; + } + } + } + } + tracked_scaled_anchor_data.emplace_back(output_anchor); + } + // Set anchor data for next iteration + previous_anchor_data_ = tracked_scaled_anchor_data; + + cc->Outputs() + .Tag(kAnchorsTag) + .AddPacket(MakePacket>(tracked_scaled_anchor_data) + .At(cc->InputTimestamp())); + cc->Outputs() + .Tag(kBoxesOutputTag) + .Add(pos_boxes.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); +} +} // namespace mediapipe diff --git a/mediapipe/graphs/instant_motion_tracking/calculators/transformations.h b/mediapipe/graphs/instant_motion_tracking/calculators/transformations.h new file mode 100644 index 000000000..cbacdb712 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/calculators/transformations.h @@ -0,0 +1,42 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_ +#define MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_ + +namespace mediapipe { + +// Radians by which to rotate the object (Provided by UI input) +struct UserRotation { + float rotation_radians; + int sticker_id; +}; + +// Scaling factor provided by the UI application end +struct UserScaling { + float scale_factor; + int sticker_id; +}; + +// The normalized anchor coordinates of a sticker +struct Anchor { + float x; // [0.0-1.0] + float y; // [0.0-1.0] + float z; // Centered around 1.0 [current_scale = z * initial_scale] + int sticker_id; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_GRAPHS_INSTANT_MOTION_TRACKING_CALCULATORS_TRANSFORMATIONS_H_ diff --git a/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt b/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt new file mode 100644 index 000000000..468262be2 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/instant_motion_tracking.pbtxt @@ -0,0 +1,80 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MediaPipe graph that performs region tracking and 3d object (AR sticker) rendering. + +# Images in/out of graph with sticker data and IMU information from device +input_stream: "input_video" +input_stream: "sticker_sentinel" +input_stream: "sticker_proto_string" +input_stream: "imu_rotation_matrix" +input_stream: "gif_texture" +input_stream: "gif_aspect_ratio" +output_stream: "output_video" + +# Converts sticker data into user data (rotations/scalings), render data, and +# initial anchors. +node { + calculator: "StickerManagerCalculator" + input_stream: "PROTO:sticker_proto_string" + output_stream: "ANCHORS:initial_anchor_data" + output_stream: "USER_ROTATIONS:user_rotation_data" + output_stream: "USER_SCALINGS:user_scaling_data" + output_stream: "RENDER_DATA:sticker_render_data" +} + +# Uses box tracking in order to create 'anchors' for associated 3d stickers. +node { + calculator: "RegionTrackingSubgraph" + input_stream: "VIDEO:input_video" + input_stream: "SENTINEL:sticker_sentinel" + input_stream: "ANCHORS:initial_anchor_data" + output_stream: "ANCHORS:tracked_anchor_data" +} + +# Concatenates all transformations to generate model matrices for the OpenGL +# animation overlay calculator. +node { + calculator: "MatricesManagerCalculator" + input_stream: "ANCHORS:tracked_anchor_data" + input_stream: "IMU_ROTATION:imu_rotation_matrix" + input_stream: "USER_ROTATIONS:user_rotation_data" + input_stream: "USER_SCALINGS:user_scaling_data" + input_stream: "RENDER_DATA:sticker_render_data" + input_stream: "GIF_ASPECT_RATIO:gif_aspect_ratio" + output_stream: "MATRICES:0:gif_matrices" + output_stream: "MATRICES:1:asset_3d_matrices" + input_side_packet: "FOV:vertical_fov_radians" + input_side_packet: "ASPECT_RATIO:aspect_ratio" +} + +# Renders the final 3d stickers and overlays them on input image. +node { + calculator: "GlAnimationOverlayCalculator" + input_stream: "VIDEO:input_video" + input_stream: "MODEL_MATRICES:gif_matrices" + input_stream: "TEXTURE:gif_texture" + input_side_packet: "ANIMATION_ASSET:gif_asset_name" + output_stream: "asset_gif_rendered" +} + +# Renders the final 3d stickers and overlays them on top of the input image. +node { + calculator: "GlAnimationOverlayCalculator" + input_stream: "VIDEO:asset_gif_rendered" + input_stream: "MODEL_MATRICES:asset_3d_matrices" + input_side_packet: "TEXTURE:texture_3d" + input_side_packet: "ANIMATION_ASSET:asset_3d" + output_stream: "output_video" +} diff --git a/mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD b/mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD new file mode 100644 index 000000000..cd1561bb7 --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/subgraphs/BUILD @@ -0,0 +1,32 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "region_tracking", + graph = "region_tracking.pbtxt", + register_as = "RegionTrackingSubgraph", + deps = [ + "//mediapipe/graphs/instant_motion_tracking/calculators:tracked_anchor_manager_calculator", + "//mediapipe/graphs/tracking/subgraphs:box_tracking_gpu", + ], +) diff --git a/mediapipe/graphs/instant_motion_tracking/subgraphs/region_tracking.pbtxt b/mediapipe/graphs/instant_motion_tracking/subgraphs/region_tracking.pbtxt new file mode 100644 index 000000000..f8ef3ad0f --- /dev/null +++ b/mediapipe/graphs/instant_motion_tracking/subgraphs/region_tracking.pbtxt @@ -0,0 +1,47 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MediaPipe graph that performs region tracking on initial anchor positions +# provided by the application + +# Images in/out of graph with tracked and scaled normalized anchor data +type: "RegionTrackingSubgraph" +input_stream: "VIDEO:input_video" +input_stream: "SENTINEL:sticker_sentinel" +input_stream: "ANCHORS:initial_anchor_data" +output_stream: "ANCHORS:tracked_scaled_anchor_data" + +# Manages the anchors and tracking if user changes/adds/deletes anchors +node { + calculator: "TrackedAnchorManagerCalculator" + input_stream: "SENTINEL:sticker_sentinel" + input_stream: "ANCHORS:initial_anchor_data" + input_stream: "BOXES:boxes" + input_stream_info: { + tag_index: 'BOXES' + back_edge: true + } + output_stream: "START_POS:start_pos" + output_stream: "CANCEL_ID:cancel_object_id" + output_stream: "ANCHORS:tracked_scaled_anchor_data" +} + +# Subgraph performs anchor placement and tracking +node { + calculator: "BoxTrackingSubgraphGpu" + input_stream: "VIDEO:input_video" + input_stream: "BOXES:start_pos" + input_stream: "CANCEL_ID:cancel_object_id" + output_stream: "BOXES:boxes" +} diff --git a/mediapipe/graphs/iris_tracking/BUILD b/mediapipe/graphs/iris_tracking/BUILD index ec9345564..86e667b80 100644 --- a/mediapipe/graphs/iris_tracking/BUILD +++ b/mediapipe/graphs/iris_tracking/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) @@ -30,6 +30,7 @@ cc_library( "//mediapipe/calculators/image:image_file_properties_calculator", "//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator", "//mediapipe/calculators/image:opencv_image_encoder_calculator", + "//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator", "//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_cpu", "//mediapipe/modules/face_landmark:face_landmark_front_cpu", "//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu", @@ -42,6 +43,7 @@ cc_library( "//mediapipe/calculators/core:constant_side_packet_calculator", "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator", "//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu", "//mediapipe/modules/face_landmark:face_landmark_front_cpu", "//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu", @@ -56,6 +58,7 @@ cc_library( "//mediapipe/calculators/core:split_vector_calculator", "//mediapipe/calculators/video:opencv_video_decoder_calculator", "//mediapipe/calculators/video:opencv_video_encoder_calculator", + "//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator", "//mediapipe/graphs/iris_tracking/subgraphs:iris_renderer_cpu", "//mediapipe/modules/face_landmark:face_landmark_front_cpu", "//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_cpu", @@ -68,6 +71,7 @@ cc_library( "//mediapipe/calculators/core:constant_side_packet_calculator", "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/graphs/iris_tracking/calculators:update_face_landmarks_calculator", "//mediapipe/graphs/iris_tracking/subgraphs:iris_and_depth_renderer_gpu", "//mediapipe/modules/face_landmark:face_landmark_front_gpu", "//mediapipe/modules/iris_landmark:iris_landmark_left_and_right_gpu", diff --git a/mediapipe/graphs/iris_tracking/calculators/BUILD b/mediapipe/graphs/iris_tracking/calculators/BUILD index 406fc272e..3a3d57a0f 100644 --- a/mediapipe/graphs/iris_tracking/calculators/BUILD +++ b/mediapipe/graphs/iris_tracking/calculators/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) proto_library( name = "iris_to_render_data_calculator_proto", @@ -90,3 +90,18 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "update_face_landmarks_calculator", + srcs = ["update_face_landmarks_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_file_properties_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) diff --git a/mediapipe/graphs/iris_tracking/calculators/update_face_landmarks_calculator.cc b/mediapipe/graphs/iris_tracking/calculators/update_face_landmarks_calculator.cc new file mode 100644 index 000000000..d1bb4b850 --- /dev/null +++ b/mediapipe/graphs/iris_tracking/calculators/update_face_landmarks_calculator.cc @@ -0,0 +1,269 @@ +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace { + +constexpr char kFaceLandmarksTag[] = "FACE_LANDMARKS"; +constexpr char kNewEyeLandmarksTag[] = "NEW_EYE_LANDMARKS"; +constexpr char kUpdatedFaceLandmarksTag[] = "UPDATED_FACE_LANDMARKS"; + +constexpr int kNumFaceLandmarks = 468; +// 71 landamrks for left eye and 71 landmarks for right eye. +constexpr int kNumEyeLandmarks = 142; + +constexpr int kEyeLandmarkIndicesInFaceLandmarks[] = { + // Left eye + // eye lower contour + 33, + 7, + 163, + 144, + 145, + 153, + 154, + 155, + 133, + // eye upper contour (excluding corners) + 246, + 161, + 160, + 159, + 158, + 157, + 173, + // halo x2 lower contour + 130, + 25, + 110, + 24, + 23, + 22, + 26, + 112, + 243, + // halo x2 upper contour (excluding corners) + 247, + 30, + 29, + 27, + 28, + 56, + 190, + // halo x3 lower contour + 226, + 31, + 228, + 229, + 230, + 231, + 232, + 233, + 244, + // halo x3 upper contour (excluding corners) + 113, + 225, + 224, + 223, + 222, + 221, + 189, + // halo x4 upper contour (no lower because of mesh structure) + // or eyebrow inner contour + 35, + 124, + 46, + 53, + 52, + 65, + // halo x5 lower contour + 143, + 111, + 117, + 118, + 119, + 120, + 121, + 128, + 245, + // halo x5 upper contour (excluding corners) + // or eyebrow outer contour + 156, + 70, + 63, + 105, + 66, + 107, + 55, + 193, + + // Right eye + // eye lower contour + 263, + 249, + 390, + 373, + 374, + 380, + 381, + 382, + 362, + // eye upper contour (excluding corners) + 466, + 388, + 387, + 386, + 385, + 384, + 398, + // halo x2 lower contour + 359, + 255, + 339, + 254, + 253, + 252, + 256, + 341, + 463, + // halo x2 upper contour (excluding corners) + 467, + 260, + 259, + 257, + 258, + 286, + 414, + // halo x3 lower contour + 446, + 261, + 448, + 449, + 450, + 451, + 452, + 453, + 464, + // halo x3 upper contour (excluding corners) + 342, + 445, + 444, + 443, + 442, + 441, + 413, + // halo x4 upper contour (no lower because of mesh structure) + // or eyebrow inner contour + 265, + 353, + 276, + 283, + 282, + 295, + // halo x5 lower contour + 372, + 340, + 346, + 347, + 348, + 349, + 350, + 357, + 465, + // halo x5 upper contour (excluding corners) + // or eyebrow outer contour + 383, + 300, + 293, + 334, + 296, + 336, + 285, + 417, +}; + +} // namespace + +// Update face landmarks with new (e.g., refined) values. Currently only updates +// landmarks around the eyes. +// +// Usage example: +// node { +// calculator: "UpdateFaceLandmarksCalculator" +// input_stream: "NEW_EYE_LANDMARKS:new_eye_landmarks" +// input_stream: "FACE_LANDMARKS:face_landmarks" +// output_stream: "UPDATED_FACE_LANDMARKS:refine_face_landmarks" +// } +// +class UpdateFaceLandmarksCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kFaceLandmarksTag).Set(); + cc->Inputs().Tag(kNewEyeLandmarksTag).Set(); + + cc->Outputs().Tag(kUpdatedFaceLandmarksTag).Set(); + + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(UpdateFaceLandmarksCalculator); + +::mediapipe::Status UpdateFaceLandmarksCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().Tag(kFaceLandmarksTag).IsEmpty() || + cc->Inputs().Tag(kNewEyeLandmarksTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + const auto& face_landmarks = + cc->Inputs().Tag(kFaceLandmarksTag).Get(); + const auto& new_eye_landmarks = + cc->Inputs().Tag(kNewEyeLandmarksTag).Get(); + + RET_CHECK_EQ(face_landmarks.landmark_size(), kNumFaceLandmarks) + << "Wrong number of face landmarks"; + RET_CHECK_EQ(new_eye_landmarks.landmark_size(), kNumEyeLandmarks) + << "Wrong number of face landmarks"; + + auto refined_face_landmarks = + absl::make_unique(face_landmarks); + for (int i = 0; i < kNumEyeLandmarks; ++i) { + const auto& refined_ld = new_eye_landmarks.landmark(i); + const int id = kEyeLandmarkIndicesInFaceLandmarks[i]; + refined_face_landmarks->mutable_landmark(id)->set_x(refined_ld.x()); + refined_face_landmarks->mutable_landmark(id)->set_y(refined_ld.y()); + refined_face_landmarks->mutable_landmark(id)->set_z(refined_ld.z()); + refined_face_landmarks->mutable_landmark(id)->set_visibility( + refined_ld.visibility()); + } + cc->Outputs() + .Tag(kUpdatedFaceLandmarksTag) + .Add(refined_face_landmarks.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt b/mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt index da5fd2ddf..3597e7f53 100644 --- a/mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt @@ -124,11 +124,25 @@ node { output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks" } +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "left_eye_contour_landmarks" + input_stream: "right_eye_contour_landmarks" + output_stream: "refined_eye_landmarks" +} + +node { + calculator: "UpdateFaceLandmarksCalculator" + input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks" + input_stream: "FACE_LANDMARKS:face_landmarks" + output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks" +} + # Renders annotations and overlays them on top of the input images. node { calculator: "IrisAndDepthRendererCpu" input_stream: "IMAGE:input_image" - input_stream: "FACE_LANDMARKS:face_landmarks" + input_stream: "FACE_LANDMARKS:updated_face_landmarks" input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks" input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks" input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks" @@ -138,6 +152,7 @@ node { input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks" input_stream: "DETECTIONS:face_detections" input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties" + output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_image" output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm" output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm" diff --git a/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt b/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt index 2d21913d8..c0a385757 100644 --- a/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt @@ -8,6 +8,8 @@ input_stream: "input_video" # CPU image. (ImageFrame) output_stream: "output_video" +# Face landmarks with iris. (NormalizedLandmarkList) +output_stream: "face_landmarks_with_iris" # Defines how many faces to detect. Iris tracking currently only handles one # face (left and right eye), and therefore this should always be set to 1. @@ -101,11 +103,25 @@ node { output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks" } +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "left_eye_contour_landmarks" + input_stream: "right_eye_contour_landmarks" + output_stream: "refined_eye_landmarks" +} + +node { + calculator: "UpdateFaceLandmarksCalculator" + input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks" + input_stream: "FACE_LANDMARKS:face_landmarks" + output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks" +} + # Renders annotations and overlays them on top of the input images. node { calculator: "IrisRendererCpu" input_stream: "IMAGE:input_video" - input_stream: "FACE_LANDMARKS:face_landmarks" + input_stream: "FACE_LANDMARKS:updated_face_landmarks" input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks" input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks" input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks" @@ -114,5 +130,13 @@ node { input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks" input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks" input_stream: "DETECTIONS:face_detections" + output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_video" } + +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "updated_face_landmarks" + input_stream: "iris_landmarks" + output_stream: "face_landmarks_with_iris" +} diff --git a/mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt b/mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt index 8566adc3e..82229bdcb 100644 --- a/mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt +++ b/mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt @@ -106,11 +106,25 @@ node { output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks" } +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "left_eye_contour_landmarks" + input_stream: "right_eye_contour_landmarks" + output_stream: "refined_eye_landmarks" +} + +node { + calculator: "UpdateFaceLandmarksCalculator" + input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks" + input_stream: "FACE_LANDMARKS:face_landmarks" + output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks" +} + # Renders annotations and overlays them on top of the input images. node { calculator: "IrisRendererCpu" input_stream: "IMAGE:input_video" - input_stream: "FACE_LANDMARKS:face_landmarks" + input_stream: "FACE_LANDMARKS:updated_face_landmarks" input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks" input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks" input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks" @@ -119,6 +133,7 @@ node { input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks" input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks" input_stream: "DETECTIONS:face_detections" + output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_video" } diff --git a/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt b/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt index 27f9666a6..505a9514d 100644 --- a/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt @@ -7,6 +7,8 @@ input_stream: "input_video" # GPU buffer. (GpuBuffer) output_stream: "output_video" +# Face landmarks with iris. (NormalizedLandmarkList) +output_stream: "face_landmarks_with_iris" # Throttles the images flowing downstream for flow control. It passes through # the very first incoming image unaltered, and waits for downstream nodes @@ -121,11 +123,25 @@ node { output_stream: "RIGHT_EYE_ROI:right_eye_rect_from_landmarks" } +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "left_eye_contour_landmarks" + input_stream: "right_eye_contour_landmarks" + output_stream: "refined_eye_landmarks" +} + +node { + calculator: "UpdateFaceLandmarksCalculator" + input_stream: "NEW_EYE_LANDMARKS:refined_eye_landmarks" + input_stream: "FACE_LANDMARKS:face_landmarks" + output_stream: "UPDATED_FACE_LANDMARKS:updated_face_landmarks" +} + # Renders annotations and overlays them on top of the input images. node { calculator: "IrisAndDepthRendererGpu" input_stream: "IMAGE:throttled_input_video" - input_stream: "FACE_LANDMARKS:face_landmarks" + input_stream: "FACE_LANDMARKS:updated_face_landmarks" input_stream: "EYE_LANDMARKS_LEFT:left_eye_contour_landmarks" input_stream: "EYE_LANDMARKS_RIGHT:right_eye_contour_landmarks" input_stream: "IRIS_LANDMARKS_LEFT:left_iris_landmarks" @@ -138,3 +154,10 @@ node { output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_video" } + +node { + calculator: "ConcatenateNormalizedLandmarkListCalculator" + input_stream: "updated_face_landmarks" + input_stream: "iris_landmarks" + output_stream: "face_landmarks_with_iris" +} diff --git a/mediapipe/graphs/iris_tracking/subgraphs/BUILD b/mediapipe/graphs/iris_tracking/subgraphs/BUILD index fef1c59b2..a6dd9e510 100644 --- a/mediapipe/graphs/iris_tracking/subgraphs/BUILD +++ b/mediapipe/graphs/iris_tracking/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) @@ -32,6 +32,8 @@ cc_library( "//mediapipe/calculators/util:detections_to_render_data_calculator", "//mediapipe/calculators/util:landmarks_to_render_data_calculator", "//mediapipe/calculators/util:rect_to_render_data_calculator", + "//mediapipe/graphs/face_mesh/calculators:face_landmarks_to_render_data_calculator", + "//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator", ], ) @@ -42,7 +44,6 @@ mediapipe_simple_subgraph( deps = [ ":renderer_calculators", "//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator", - "//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator", ], ) @@ -52,7 +53,6 @@ mediapipe_simple_subgraph( register_as = "IrisRendererCpu", deps = [ ":renderer_calculators", - "//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator", ], ) @@ -63,6 +63,5 @@ mediapipe_simple_subgraph( deps = [ ":renderer_calculators", "//mediapipe/graphs/iris_tracking/calculators:iris_to_depth_calculator", - "//mediapipe/graphs/iris_tracking/calculators:iris_to_render_data_calculator", ], ) diff --git a/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_cpu.pbtxt b/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_cpu.pbtxt index 2b5ab195f..fad6d4a88 100644 --- a/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_cpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_cpu.pbtxt @@ -13,6 +13,7 @@ input_stream: "NORM_RECT:rect" input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks" input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks" input_side_packet: "IMAGE_FILE_PROPERTIES:image_file_properties" +output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_image" output_stream: "LEFT_IRIS_DEPTH_MM:left_iris_depth_mm" output_stream: "RIGHT_IRIS_DEPTH_MM:right_iris_depth_mm" @@ -47,13 +48,19 @@ node { output_stream: "iris_landmarks" } -# Concatenate iris landmarks from both eyes and face landmarks. +# Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "ConcatenateNormalizedLandmarkListCalculator" - input_stream: "left_iris_landmarks" - input_stream: "right_iris_landmarks" - input_stream: "face_landmarks" - output_stream: "face_iris_landmarks" + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:face_landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 150 g: 0 b: 0 } + connection_color { r: 0 g: 150 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } } node { @@ -252,6 +259,7 @@ node { calculator: "AnnotationOverlayCalculator" input_stream: "IMAGE:input_image" input_stream: "detection_render_data" + input_stream: "face_landmarks_render_data" input_stream: "right_eye_contour_landmarks_render_data" input_stream: "left_eye_contour_landmarks_render_data" input_stream: "iris_render_data" diff --git a/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt b/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt index 7a64be6e0..ba043d30d 100644 --- a/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt @@ -46,13 +46,19 @@ node { output_stream: "iris_landmarks" } -# Concatenate iris landmarks from both eyes and face landmarks. +# Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "ConcatenateNormalizedLandmarkListCalculator" - input_stream: "left_iris_landmarks" - input_stream: "right_iris_landmarks" - input_stream: "face_landmarks" - output_stream: "face_iris_landmarks" + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:face_landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 150 g: 0 b: 0 } + connection_color { r: 0 g: 150 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } } node { @@ -251,6 +257,7 @@ node { calculator: "AnnotationOverlayCalculator" input_stream: "IMAGE_GPU:input_image" input_stream: "detection_render_data" + input_stream: "face_landmarks_render_data" input_stream: "right_eye_contour_landmarks_render_data" input_stream: "left_eye_contour_landmarks_render_data" input_stream: "iris_render_data" diff --git a/mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt b/mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt index b88731e33..81a3c90d0 100644 --- a/mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt +++ b/mediapipe/graphs/iris_tracking/subgraphs/iris_renderer_cpu.pbtxt @@ -12,6 +12,7 @@ input_stream: "IRIS_LANDMARKS_RIGHT:right_iris_landmarks" input_stream: "NORM_RECT:rect" input_stream: "LEFT_EYE_RECT:left_eye_rect_from_landmarks" input_stream: "RIGHT_EYE_RECT:right_eye_rect_from_landmarks" +output_stream: "IRIS_LANDMARKS:iris_landmarks" output_stream: "IMAGE:output_image" node { @@ -44,15 +45,22 @@ node { output_stream: "iris_landmarks" } -# Concatenate iris landmarks from both eyes and face landmarks. +# Converts landmarks to drawing primitives for annotation overlay. node { - calculator: "ConcatenateNormalizedLandmarkListCalculator" - input_stream: "left_iris_landmarks" - input_stream: "right_iris_landmarks" - input_stream: "face_landmarks" - output_stream: "face_iris_landmarks" + calculator: "FaceLandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:face_landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 150 g: 0 b: 0 } + connection_color { r: 0 g: 150 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } } + node { calculator: "ImagePropertiesCalculator" input_stream: "IMAGE:input_image" @@ -238,6 +246,7 @@ node { calculator: "AnnotationOverlayCalculator" input_stream: "IMAGE:input_image" input_stream: "detection_render_data" + input_stream: "face_landmarks_render_data" input_stream: "right_eye_contour_landmarks_render_data" input_stream: "left_eye_contour_landmarks_render_data" input_stream: "iris_render_data" diff --git a/mediapipe/graphs/media_sequence/BUILD b/mediapipe/graphs/media_sequence/BUILD index 42af89b51..e989147dd 100644 --- a/mediapipe/graphs/media_sequence/BUILD +++ b/mediapipe/graphs/media_sequence/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/object_detection/BUILD b/mediapipe/graphs/object_detection/BUILD index 36c0181a9..ef53fd2f2 100644 --- a/mediapipe/graphs/object_detection/BUILD +++ b/mediapipe/graphs/object_detection/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/object_detection_3d/BUILD b/mediapipe/graphs/object_detection_3d/BUILD index d8f0a9744..846aa6739 100644 --- a/mediapipe/graphs/object_detection_3d/BUILD +++ b/mediapipe/graphs/object_detection_3d/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/object_detection_3d/calculators/BUILD b/mediapipe/graphs/object_detection_3d/calculators/BUILD index be84c3e40..b2ddc9dce 100644 --- a/mediapipe/graphs/object_detection_3d/calculators/BUILD +++ b/mediapipe/graphs/object_detection_3d/calculators/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) @@ -319,6 +319,7 @@ cc_library( "model.h", "types.h", ], + visibility = ["//visibility:public"], deps = [ ":annotation_cc_proto", ":object_cc_proto", diff --git a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc index be415b3cd..067b8d004 100644 --- a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc +++ b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc @@ -1,4 +1,4 @@ -// Copyright 2020 The MediaPipe Authors. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ typedef ImageFrame AssetTextureFormat; typedef GpuBuffer AssetTextureFormat; #endif -enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; +enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, ATTRIB_NORMAL, NUM_ATTRIBUTES }; static const int kNumMatrixEntries = 16; // Hard-coded MVP Matrix for testing. @@ -68,10 +68,14 @@ static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0, // MODEL_MATRICES (TimedModelMatrixProtoList, optional): // If provided, will set the model matrices for the objects to be rendered // during future rendering calls. +// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional): +// Texture to use with animation file. Texture is REQUIRED to be passed into +// the calculator, but can be passed in as a Side Packet OR Input Stream. // // Input side packets: -// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, required): -// Texture to use with animation file. +// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional): +// Texture to use with animation file. Texture is REQUIRED to be passed into +// the calculator, but can be passed in as a Side Packet OR Input Stream. // ANIMATION_ASSET (String, required): // Path of animation file to load and render. Should be generated by // //java/com/google/android/apps/motionstills/SimpleObjEncryptor with @@ -98,9 +102,9 @@ static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0, // Simple helper-struct for containing the parsed geometry data from a 3D // animation frame for rendering. - struct TriangleMesh { int index_count = 0; // Needed for glDrawElements rendering call + std::unique_ptr normals = nullptr; std::unique_ptr vertices = nullptr; std::unique_ptr texture_coords = nullptr; std::unique_ptr triangle_indices = nullptr; @@ -156,6 +160,7 @@ class GlAnimationOverlayCalculator : public CalculatorBase { void ComputeAspectRatioAndFovFromCameraParameters( const CameraParametersProto &camera_parameters, float *aspect_ratio, float *vertical_fov_degrees); + int GetAnimationFrameIndex(Timestamp timestamp); ::mediapipe::Status GlSetup(); ::mediapipe::Status GlBind(const TriangleMesh &triangle_mesh, @@ -167,6 +172,9 @@ class GlAnimationOverlayCalculator : public CalculatorBase { float z_far); void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices, std::vector *current_model_matrices); + void CalculateTriangleMeshNormals(int normals_len, + TriangleMesh *triangle_mesh); + void Normalize3f(float input[3]); #if !defined(__ANDROID__) // Asset loading routine for all non-Android platforms. @@ -198,7 +206,13 @@ REGISTER_CALCULATOR(GlAnimationOverlayCalculator); cc->Inputs().Tag("MASK_MODEL_MATRICES").Set(); } - cc->InputSidePackets().Tag("TEXTURE").Set(); + // Must have texture as Input Stream or Side Packet + if (cc->InputSidePackets().HasTag("TEXTURE")) { + cc->InputSidePackets().Tag("TEXTURE").Set(); + } else { + cc->Inputs().Tag("TEXTURE").Set(); + } + cc->InputSidePackets().Tag("ANIMATION_ASSET").Set(); if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) { cc->InputSidePackets() @@ -216,6 +230,83 @@ REGISTER_CALCULATOR(GlAnimationOverlayCalculator); return ::mediapipe::OkStatus(); } +void GlAnimationOverlayCalculator::CalculateTriangleMeshNormals( + int normals_len, TriangleMesh *triangle_mesh) { + // Set triangle_mesh normals for shader usage + triangle_mesh->normals.reset(new float[normals_len]); + // Used for storing the vertex normals prior to averaging + std::vector vertex_normals_sum(normals_len, 0.0f); + // Compute every triangle surface normal and store them for averaging + for (int idx = 0; idx < triangle_mesh->index_count; idx += 3) { + int v_idx[3]; + v_idx[0] = triangle_mesh->triangle_indices.get()[idx]; + v_idx[1] = triangle_mesh->triangle_indices.get()[idx + 1]; + v_idx[2] = triangle_mesh->triangle_indices.get()[idx + 2]; + // (V1) vertex X,Y,Z indices in triangle_mesh.vertices + const float v1x = triangle_mesh->vertices[v_idx[0] * 3]; + const float v1y = triangle_mesh->vertices[v_idx[0] * 3 + 1]; + const float v1z = triangle_mesh->vertices[v_idx[0] * 3 + 2]; + // (V2) vertex X,Y,Z indices in triangle_mesh.vertices + const float v2x = triangle_mesh->vertices[v_idx[1] * 3]; + const float v2y = triangle_mesh->vertices[v_idx[1] * 3 + 1]; + const float v2z = triangle_mesh->vertices[v_idx[1] * 3 + 2]; + // (V3) vertex X,Y,Z indices in triangle_mesh.vertices + const float v3x = triangle_mesh->vertices[v_idx[2] * 3]; + const float v3y = triangle_mesh->vertices[v_idx[2] * 3 + 1]; + const float v3z = triangle_mesh->vertices[v_idx[2] * 3 + 2]; + // Calculate normals from vertices + // V2 - V1 + const float ax = v2x - v1x; + const float ay = v2y - v1y; + const float az = v2z - v1z; + // V3 - V1 + const float bx = v3x - v1x; + const float by = v3y - v1y; + const float bz = v3z - v1z; + // Calculate cross product + const float normal_x = ay * bz - az * by; + const float normal_y = az * bx - ax * bz; + const float normal_z = ax * by - ay * bx; + // The normals calculated above must be normalized if we wish to prevent + // triangles with a larger surface area from dominating the normal + // calculations, however, none of our current models require this + // normalization. + + // Add connected normal to each associated vertex + // It is also necessary to increment each vertex denominator for averaging + for (int i = 0; i < 3; i++) { + vertex_normals_sum[v_idx[i] * 3] += normal_x; + vertex_normals_sum[v_idx[i] * 3 + 1] += normal_y; + vertex_normals_sum[v_idx[i] * 3 + 2] += normal_z; + } + } + + // Combine all triangle normals connected to each vertex by adding the X,Y,Z + // value of each adjacent triangle surface normal to every vertex and then + // averaging the combined value. + for (int idx = 0; idx < normals_len; idx += 3) { + float normal[3]; + normal[0] = vertex_normals_sum[idx]; + normal[1] = vertex_normals_sum[idx + 1]; + normal[2] = vertex_normals_sum[idx + 2]; + Normalize3f(normal); + triangle_mesh->normals.get()[idx] = normal[0]; + triangle_mesh->normals.get()[idx + 1] = normal[1]; + triangle_mesh->normals.get()[idx + 2] = normal[2]; + } +} + +void GlAnimationOverlayCalculator::Normalize3f(float input[3]) { + float product = 0.0; + product += input[0] * input[0]; + product += input[1] * input[1]; + product += input[2] * input[2]; + float magnitude = sqrt(product); + input[0] /= magnitude; + input[1] /= magnitude; + input[2] /= magnitude; +} + // Helper function for initializing our perspective matrix. void GlAnimationOverlayCalculator::InitializePerspectiveMatrix( float aspect_ratio, float fov_degrees, float z_near, float z_far) { @@ -319,6 +410,10 @@ bool GlAnimationOverlayCalculator::LoadAnimationAndroid( LOG(ERROR) << "Failed to read indices for frame " << frame_count_; return false; } + + // Set the normals for this triangle_mesh + CalculateTriangleMeshNormals(lengths[0], &triangle_mesh); + frame_count_++; } AAsset_close(asset); @@ -383,6 +478,10 @@ bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) { << frame_count_; return false; } + + // Set the normals for this triangle_mesh + CalculateTriangleMeshNormals(lengths[0], &triangle_mesh); + frame_count_++; } @@ -473,10 +572,13 @@ void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters( mask_texture_ = helper_.CreateSourceTexture(mask_texture); } - // Load in our asset's texture data - const auto &input_texture = - cc->InputSidePackets().Tag("TEXTURE").Get(); - texture_ = helper_.CreateSourceTexture(input_texture); + // Load in all static texture data if it exists + if (cc->InputSidePackets().HasTag("TEXTURE")) { + const auto &input_texture = + cc->InputSidePackets().Tag("TEXTURE").Get(); + texture_ = helper_.CreateSourceTexture(input_texture); + } + VLOG(2) << "Input texture size: " << texture_.width() << ", " << texture_.height() << std::endl; @@ -604,6 +706,14 @@ void GlAnimationOverlayCalculator::LoadModelMatrices( glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); int frame_index = GetAnimationFrameIndex(cc->InputTimestamp()); const TriangleMesh ¤t_frame = triangle_meshes_[frame_index]; + + // Load dynamic texture if it exists + if (cc->Inputs().HasTag("TEXTURE")) { + const auto &input_texture = + cc->Inputs().Tag("TEXTURE").Get(); + texture_ = helper_.CreateSourceTexture(input_texture); + } + MP_RETURN_IF_ERROR(GlBind(current_frame, texture_)); if (has_model_matrix_stream_) { // Draw objects using our latest model matrix stream packet. @@ -616,8 +726,9 @@ void GlAnimationOverlayCalculator::LoadModelMatrices( } // Disable vertex attributes - GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX)); - GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION)); + GLCHECK(glDisableVertexAttribArray(ATTRIB_VERTEX)); + GLCHECK(glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION)); + GLCHECK(glDisableVertexAttribArray(ATTRIB_NORMAL)); // Disable depth test GLCHECK(glDisable(GL_DEPTH_TEST)); @@ -645,10 +756,12 @@ void GlAnimationOverlayCalculator::LoadModelMatrices( const GLint attr_location[NUM_ATTRIBUTES] = { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, + ATTRIB_NORMAL, }; const GLchar *attr_name[NUM_ATTRIBUTES] = { "position", "texture_coordinate", + "normal", }; const GLchar *vert_src = R"( @@ -660,28 +773,89 @@ void GlAnimationOverlayCalculator::LoadModelMatrices( // vertex position in threespace attribute vec4 position; + attribute vec3 normal; // texture coordinate for each vertex in normalized texture space (0..1) attribute mediump vec4 texture_coordinate; // texture coordinate for fragment shader (will be interpolated) - varying mediump vec2 sample_coordinate; + varying mediump vec2 sampleCoordinate; + varying mediump vec3 vNormal; void main() { - sample_coordinate = texture_coordinate.xy; + sampleCoordinate = texture_coordinate.xy; mat4 mvpMatrix = perspectiveMatrix * modelMatrix; gl_Position = mvpMatrix * position; + + // TODO: Pass in rotation submatrix with no scaling or transforms to prevent + // breaking vNormal in case of model matrix having non-uniform scaling + vec4 tmpNormal = mvpMatrix * vec4(normal, 1.0); + vec4 transformedZero = mvpMatrix * vec4(0.0, 0.0, 0.0, 1.0); + tmpNormal = tmpNormal - transformedZero; + vNormal = normalize(tmpNormal.xyz); } )"; const GLchar *frag_src = R"( precision mediump float; - varying vec2 sample_coordinate; // texture coordinate (0..1) + varying vec2 sampleCoordinate; // texture coordinate (0..1) + varying vec3 vNormal; uniform sampler2D texture; // texture to shade with + const float kPi = 3.14159265359; + + // Define ambient lighting factor that is applied to our texture in order to + // generate ambient lighting of the scene on the object. Range is [0.0-1.0], + // with the factor being proportional to the brightness of the lighting in the + // scene being applied to the object + const float kAmbientLighting = 0.75; + + // Define RGB values for light source + const vec3 kLightColor = vec3(0.25); + // Exponent for directional lighting that governs diffusion of surface light + const float kExponent = 1.0; + // Define direction of lighting effect source + const vec3 lightDir = vec3(0.0, -1.0, -0.6); + // Hard-coded view direction + const vec3 viewDir = vec3(0.0, 0.0, -1.0); + + // DirectionalLighting procedure imported from Lullaby @ https://github.com/google/lullaby + // Calculate and return the color (diffuse and specular together) reflected by + // a directional light. + vec3 GetDirectionalLight(vec3 pos, vec3 normal, vec3 viewDir, vec3 lightDir, vec3 lightColor, float exponent) { + // Intensity of the diffuse light. Saturate to keep within the 0-1 range. + float normal_dot_light_dir = dot(-normal, -lightDir); + float intensity = clamp(normal_dot_light_dir, 0.0, 1.0); + // Calculate the diffuse light + vec3 diffuse = intensity * lightColor; + // http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/ + float kEnergyConservation = (2.0 + exponent) / (2.0 * kPi); + vec3 reflect_dir = reflect(lightDir, -normal); + // Intensity of the specular light + float view_dot_reflect = dot(-viewDir, reflect_dir); + // Use an epsilon for pow because pow(x,y) is undefined if x < 0 or x == 0 + // and y <= 0 (GLSL Spec 8.2) + const float kEpsilon = 1e-5; + intensity = kEnergyConservation * pow(clamp(view_dot_reflect, kEpsilon, 1.0), + exponent); + // Specular color: + vec3 specular = intensity * lightColor; + return diffuse + specular; + } void main() { - gl_FragColor = texture2D(texture, sample_coordinate); + // Sample the texture, retrieving an rgba pixel value + vec4 pixel = texture2D(texture, sampleCoordinate); + // If the alpha (background) value is near transparent, then discard the + // pixel, this allows the rendering of transparent background GIFs + // TODO: Adding a toggle to perform pixel alpha discarding for transparent + // GIFs (prevent interference with Objectron system). + if (pixel.a < 0.2) discard; + + // Generate directional lighting effect + vec3 lighting = GetDirectionalLight(gl_FragCoord.xyz, vNormal, viewDir, lightDir, kLightColor, kExponent); + // Apply both ambient and directional lighting to our texture + gl_FragColor = vec4((vec3(kAmbientLighting) + lighting) * pixel.rgb, 1.0); } )"; @@ -718,6 +892,9 @@ void GlAnimationOverlayCalculator::LoadModelMatrices( GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, triangle_mesh.texture_coords.get())); GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION)); + GLCHECK(glVertexAttribPointer(ATTRIB_NORMAL, 3, GL_FLOAT, 0, 0, + triangle_mesh.normals.get())); + GLCHECK(glEnableVertexAttribArray(ATTRIB_NORMAL)); GLCHECK(glActiveTexture(GL_TEXTURE1)); GLCHECK(glBindTexture(texture.target(), texture.name())); diff --git a/mediapipe/graphs/object_detection_3d/subgraphs/BUILD b/mediapipe/graphs/object_detection_3d/subgraphs/BUILD index f471e4189..763e7372a 100644 --- a/mediapipe/graphs/object_detection_3d/subgraphs/BUILD +++ b/mediapipe/graphs/object_detection_3d/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/pose_tracking/BUILD b/mediapipe/graphs/pose_tracking/BUILD index fcb1217f7..0fb6726ea 100644 --- a/mediapipe/graphs/pose_tracking/BUILD +++ b/mediapipe/graphs/pose_tracking/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/pose_tracking/calculators/BUILD b/mediapipe/graphs/pose_tracking/calculators/BUILD index fa8d9f41b..9ac2e9a07 100644 --- a/mediapipe/graphs/pose_tracking/calculators/BUILD +++ b/mediapipe/graphs/pose_tracking/calculators/BUILD @@ -14,7 +14,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/pose_tracking/subgraphs/BUILD b/mediapipe/graphs/pose_tracking/subgraphs/BUILD index 0d7a5aa5d..43f436d45 100644 --- a/mediapipe/graphs/pose_tracking/subgraphs/BUILD +++ b/mediapipe/graphs/pose_tracking/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/template_matching/BUILD b/mediapipe/graphs/template_matching/BUILD index 5e8e2d348..bc254d28e 100644 --- a/mediapipe/graphs/template_matching/BUILD +++ b/mediapipe/graphs/template_matching/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/tracking/BUILD b/mediapipe/graphs/tracking/BUILD index 19c00449e..9e6e75f31 100644 --- a/mediapipe/graphs/tracking/BUILD +++ b/mediapipe/graphs/tracking/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_binary_graph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/tracking/subgraphs/BUILD b/mediapipe/graphs/tracking/subgraphs/BUILD index 6b247fa12..16f87f3e2 100644 --- a/mediapipe/graphs/tracking/subgraphs/BUILD +++ b/mediapipe/graphs/tracking/subgraphs/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/graphs/youtube8m/BUILD b/mediapipe/graphs/youtube8m/BUILD index c697d16c0..7318a8cdc 100644 --- a/mediapipe/graphs/youtube8m/BUILD +++ b/mediapipe/graphs/youtube8m/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/java/com/google/mediapipe/BUILD b/mediapipe/java/com/google/mediapipe/BUILD index 82e2f52c2..6995a7636 100644 --- a/mediapipe/java/com/google/mediapipe/BUILD +++ b/mediapipe/java/com/google/mediapipe/BUILD @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) diff --git a/mediapipe/java/com/google/mediapipe/components/BUILD b/mediapipe/java/com/google/mediapipe/components/BUILD index 2e0c32df8..dcbdd3b72 100644 --- a/mediapipe/java/com/google/mediapipe/components/BUILD +++ b/mediapipe/java/com/google/mediapipe/components/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) android_library( name = "android_components", diff --git a/mediapipe/java/com/google/mediapipe/framework/BUILD b/mediapipe/java/com/google/mediapipe/framework/BUILD index 0a378ad45..dd5ae2e2a 100644 --- a/mediapipe/java/com/google/mediapipe/framework/BUILD +++ b/mediapipe/java/com/google/mediapipe/framework/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) # MediaPipe Android framework. @@ -79,7 +79,7 @@ android_library( "AssetCache.java", "AssetCacheDbHelper.java", "MediaPipeRunner.java", - "PacketUtil.java", + "ProtoUtil.java", "TypeNameRegistry.java", "TypeNameRegistryLite.java", "TypeNameRegistryFull.java", @@ -103,7 +103,7 @@ android_library( android_library( name = "framework_proto_lite", srcs = [ - "PacketUtil.java", + "ProtoUtil.java", "TypeNameRegistry.java", "TypeNameRegistryLite.java", ], diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java b/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java index d5cfb1c69..47b1814be 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java +++ b/mediapipe/java/com/google/mediapipe/framework/PacketCreator.java @@ -14,7 +14,7 @@ package com.google.mediapipe.framework; -import com.google.mediapipe.framework.PacketUtil.SerializedMessage; +import com.google.mediapipe.framework.ProtoUtil.SerializedMessage; import com.google.protobuf.MessageLite; import java.nio.ByteBuffer; import java.nio.FloatBuffer; @@ -208,7 +208,7 @@ public class PacketCreator { } public Packet createFloat32Vector(float[] data) { - throw new UnsupportedOperationException("Not implemented yet"); + return Packet.create(nativeCreateFloat32Vector(mediapipeGraph.getNativeHandle(), data)); } public Packet createFloat64Vector(double[] data) { @@ -266,7 +266,7 @@ public class PacketCreator { /** Creates a {@link Packet} containing a protobuf MessageLite. */ public Packet createProto(MessageLite message) { - SerializedMessage serialized = PacketUtil.pack(message); + SerializedMessage serialized = ProtoUtil.pack(message); return Packet.create( nativeCreateProto(mediapipeGraph.getNativeHandle(), serialized)); } @@ -366,6 +366,9 @@ public class PacketCreator { long context, int name, int width, int height, TextureReleaseCallback releaseCallback); private native long nativeCreateInt32Array(long context, int[] data); private native long nativeCreateFloat32Array(long context, float[] data); + + private native long nativeCreateFloat32Vector(long context, float[] data); + private native long nativeCreateStringFromByteArray(long context, byte[] data); private native long nativeCreateProto(long context, SerializedMessage data); diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java index 67fa955b4..849cf76db 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java +++ b/mediapipe/java/com/google/mediapipe/framework/PacketGetter.java @@ -16,7 +16,7 @@ package com.google.mediapipe.framework; import com.google.common.base.Preconditions; import com.google.common.flogger.FluentLogger; -import com.google.mediapipe.framework.PacketUtil.SerializedMessage; +import com.google.mediapipe.framework.ProtoUtil.SerializedMessage; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.MessageLite; import com.google.protobuf.Parser; @@ -123,7 +123,7 @@ public final class PacketGetter { throws InvalidProtocolBufferException { SerializedMessage result = new SerializedMessage(); nativeGetProto(packet.getNativeHandle(), result); - return PacketUtil.unpack(result, clazz); + return ProtoUtil.unpack(result, clazz); } public static short[] getInt16Vector(final Packet packet) { diff --git a/mediapipe/java/com/google/mediapipe/framework/PacketUtil.java b/mediapipe/java/com/google/mediapipe/framework/ProtoUtil.java similarity index 88% rename from mediapipe/java/com/google/mediapipe/framework/PacketUtil.java rename to mediapipe/java/com/google/mediapipe/framework/ProtoUtil.java index 25a013e48..524ded5f0 100644 --- a/mediapipe/java/com/google/mediapipe/framework/PacketUtil.java +++ b/mediapipe/java/com/google/mediapipe/framework/ProtoUtil.java @@ -21,7 +21,7 @@ import com.google.protobuf.MessageLite; import java.util.NoSuchElementException; /** Utility functions for translating MediaPipe packet data between languages. */ -final class PacketUtil { +public final class ProtoUtil { /** Records the protobuf type name for a Java Class. */ public static void registerTypeName(Class clazz, String typeName) { typeNameRegistry.registerTypeName(clazz, typeName); @@ -43,7 +43,9 @@ final class PacketUtil { result.typeName = getTypeName(message.getClass()); if (result.typeName == null) { throw new NoSuchElementException( - "Cannot determine the protobuf package name for class: " + message.getClass()); + "Cannot determine the protobuf type name for class: " + + message.getClass() + + ". Have you called ProtoUtil.registerTypeName?"); } result.value = message.toByteArray(); return result; @@ -54,7 +56,7 @@ final class PacketUtil { SerializedMessage serialized, java.lang.Class clazz) throws InvalidProtocolBufferException { T defaultInstance = Internal.getDefaultInstance(clazz); - String expectedType = PacketUtil.getTypeName(defaultInstance.getClass()); + String expectedType = ProtoUtil.getTypeName(defaultInstance.getClass()); if (!serialized.typeName.equals(expectedType)) { throw new InvalidProtocolBufferException( "Message type does not match the expected type. Expected: " @@ -69,14 +71,14 @@ final class PacketUtil { (T) defaultInstance .getParserForType() - .parseFrom(serialized.value, PacketUtil.getExtensionRegistry()); + .parseFrom(serialized.value, ProtoUtil.getExtensionRegistry()); return result; } /** A singleton to find protobuf full type names. */ static TypeNameRegistry typeNameRegistry = new TypeNameRegistryConcrete(); - private PacketUtil() {} + private ProtoUtil() {} static class SerializedMessage { public String typeName; diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/BUILD b/mediapipe/java/com/google/mediapipe/framework/jni/BUILD index 8aa7894bf..3fe9efd1f 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/BUILD +++ b/mediapipe/java/com/google/mediapipe/framework/jni/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package( default_visibility = ["//visibility:public"], @@ -131,8 +131,8 @@ cc_library( cc_library( name = "jni_util", - srcs = (["jni_util.cc"]), - hdrs = (["jni_util.h"]), + srcs = ["jni_util.cc"], + hdrs = ["jni_util.h"], deps = [ ":class_registry", "@com_google_absl//absl/synchronization", @@ -148,8 +148,8 @@ cc_library( cc_library( name = "class_registry", - srcs = (["class_registry.cc"]), - hdrs = (["class_registry.h"]), + srcs = ["class_registry.cc"], + hdrs = ["class_registry.h"], deps = [ "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", @@ -164,8 +164,8 @@ cc_library( cc_library( name = "register_natives", - srcs = (["register_natives.cc"]), - hdrs = (["register_natives.h"]), + srcs = ["register_natives.cc"], + hdrs = ["register_natives.h"], deps = [ ":class_registry", ":mediapipe_framework_jni", diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc index 1bddc0166..10d6852d9 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/jni_util.cc @@ -141,7 +141,7 @@ bool ThrowIfError(JNIEnv* env, mediapipe::Status status) { SerializedMessageIds::SerializedMessageIds(JNIEnv* env, jobject data) { jclass j_class = reinterpret_cast(env->NewGlobalRef(env->FindClass( - "com/google/mediapipe/framework/PacketUtil$SerializedMessage"))); + "com/google/mediapipe/framework/ProtoUtil$SerializedMessage"))); type_name_id = env->GetFieldID(j_class, "typeName", "Ljava/lang/String;"); value_id = env->GetFieldID(j_class, "value", "[B"); } diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc index cb8acf536..f4829c794 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.cc @@ -347,6 +347,7 @@ JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateGpuBuffer)( mediapipe::Packet packet = mediapipe::MakePacket( mediapipe::GlTextureBuffer::Wrap(GL_TEXTURE_2D, name, width, height, mediapipe::GpuBufferFormat::kBGRA32, + gpu_resources->gl_context(), cc_callback)); return CreatePacketWithContext(context, packet); } @@ -375,6 +376,23 @@ JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateFloat32Array)( return CreatePacketWithContext(context, packet); } +JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateFloat32Vector)( + JNIEnv* env, jobject thiz, jlong context, jfloatArray data) { + jsize count = env->GetArrayLength(data); + jfloat* data_ref = env->GetFloatArrayElements(data, nullptr); + // jfloat is a "machine-dependent native type" which represents a 32-bit + // float. C++ makes no guarantees about the size of floating point types, and + // some exotic architectures don't even have 32-bit floats (or even binary + // floats), but on all architectures we care about this is a float. + static_assert(std::is_same::value, "jfloat must be float"); + std::unique_ptr> floats = + absl::make_unique>(data_ref, data_ref + count); + + env->ReleaseFloatArrayElements(data, data_ref, JNI_ABORT); + mediapipe::Packet packet = mediapipe::Adopt(floats.release()); + return CreatePacketWithContext(context, packet); +} + JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateInt32Array)( JNIEnv* env, jobject thiz, jlong context, jintArray data) { jsize count = env->GetArrayLength(data); diff --git a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h index e7866382a..0b448ae79 100644 --- a/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h +++ b/mediapipe/java/com/google/mediapipe/framework/jni/packet_creator_jni.h @@ -104,6 +104,9 @@ JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateGpuBuffer)( JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateFloat32Array)( JNIEnv* env, jobject thiz, jlong context, jfloatArray data); +JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateFloat32Vector)( + JNIEnv* env, jobject thiz, jlong context, jfloatArray data); + JNIEXPORT jlong JNICALL PACKET_CREATOR_METHOD(nativeCreateInt32Array)( JNIEnv* env, jobject thiz, jlong context, jintArray data); diff --git a/mediapipe/java/com/google/mediapipe/glutil/BUILD b/mediapipe/java/com/google/mediapipe/glutil/BUILD index 53ea51046..b1c89240e 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/BUILD +++ b/mediapipe/java/com/google/mediapipe/glutil/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) # OpenGL utilities. diff --git a/mediapipe/models/BUILD b/mediapipe/models/BUILD index ef6f88d65..46d164040 100644 --- a/mediapipe/models/BUILD +++ b/mediapipe/models/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD index 2c90c3593..bb576a987 100644 --- a/mediapipe/modules/face_detection/BUILD +++ b/mediapipe/modules/face_detection/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index 3dd41ecb4..ef31d4fc0 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/modules/iris_landmark/BUILD b/mediapipe/modules/iris_landmark/BUILD index cf8b8da36..e16a79b87 100644 --- a/mediapipe/modules/iris_landmark/BUILD +++ b/mediapipe/modules/iris_landmark/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/modules/pose_detection/BUILD b/mediapipe/modules/pose_detection/BUILD index 9701bfcb6..60d4f6763 100644 --- a/mediapipe/modules/pose_detection/BUILD +++ b/mediapipe/modules/pose_detection/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/modules/pose_landmark/BUILD b/mediapipe/modules/pose_landmark/BUILD index c75d70b06..70de124ab 100644 --- a/mediapipe/modules/pose_landmark/BUILD +++ b/mediapipe/modules/pose_landmark/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/mediapipe/objc/BUILD b/mediapipe/objc/BUILD index 83add1a9c..e4835b583 100644 --- a/mediapipe/objc/BUILD +++ b/mediapipe/objc/BUILD @@ -1,6 +1,6 @@ package(default_visibility = ["//visibility:private"]) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "CFHolder", diff --git a/mediapipe/objc/MPPLayerRenderer.m b/mediapipe/objc/MPPLayerRenderer.m index 44658e228..7c3027fb6 100644 --- a/mediapipe/objc/MPPLayerRenderer.m +++ b/mediapipe/objc/MPPLayerRenderer.m @@ -54,7 +54,9 @@ glGenRenderbuffers(1, &renderbuffer_); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer_); - [_glRenderer.glContext renderbufferStorage:GL_RENDERBUFFER fromDrawable:_layer]; + BOOL success = [_glRenderer.glContext renderbufferStorage:GL_RENDERBUFFER fromDrawable:_layer]; + NSAssert(success, @"could not create renderbuffer storage for layer with bounds %@", + NSStringFromCGRect(_layer.bounds)); GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); NSAssert(status == GL_FRAMEBUFFER_COMPLETE, @"failed to make complete framebuffer object %x", status); diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD index bf8d84317..a48e7911d 100644 --- a/mediapipe/python/BUILD +++ b/mediapipe/python/BUILD @@ -27,15 +27,18 @@ cc_library( pybind_extension( name = "_framework_bindings", srcs = ["framework_bindings.cc"], - linkopts = [ - "-lopencv_core", - "-lopencv_imgproc", - "-lopencv_highgui", - "-lopencv_video", - "-lopencv_features2d", - "-lopencv_calib3d", - "-lopencv_imgcodecs", - ], + linkopts = select({ + "//third_party:opencv_source_build": [], + "//conditions:default": [ + "-lopencv_core", + "-lopencv_imgproc", + "-lopencv_highgui", + "-lopencv_video", + "-lopencv_features2d", + "-lopencv_calib3d", + "-lopencv_imgcodecs", + ], + }), deps = [ ":builtin_calculators", "//mediapipe/python/pybind:calculator_graph", diff --git a/mediapipe/python/calculator_graph_test.py b/mediapipe/python/calculator_graph_test.py index 601f89a98..0ee790a89 100644 --- a/mediapipe/python/calculator_graph_test.py +++ b/mediapipe/python/calculator_graph_test.py @@ -18,7 +18,7 @@ # Dependency imports from absl.testing import absltest -import mediapipe.python as mp +import mediapipe as mp from google.protobuf import text_format from mediapipe.framework import calculator_pb2 diff --git a/mediapipe/python/image_frame_test.py b/mediapipe/python/image_frame_test.py index 8e70fd73f..6b9fd1989 100644 --- a/mediapipe/python/image_frame_test.py +++ b/mediapipe/python/image_frame_test.py @@ -17,9 +17,9 @@ import random from absl.testing import absltest import cv2 +import mediapipe as mp import numpy as np import PIL.Image -import mediapipe.python as mp # TODO: Add unit tests specifically for memory management. diff --git a/mediapipe/python/packet_creator.py b/mediapipe/python/packet_creator.py index c904dd780..1040ea688 100644 --- a/mediapipe/python/packet_creator.py +++ b/mediapipe/python/packet_creator.py @@ -16,6 +16,7 @@ """The public facing packet creator APIs.""" from typing import List, Union +import warnings import numpy as np @@ -48,34 +49,60 @@ create_string_to_packet_map = _packet_creator.create_string_to_packet_map create_matrix = _packet_creator.create_matrix -def create_image_frame( - data: Union[image_frame.ImageFrame, np.ndarray], - *, - image_format: image_frame.ImageFormat = None) -> packet.Packet: +def create_image_frame(data: Union[image_frame.ImageFrame, np.ndarray], + *, + image_format: image_frame.ImageFormat = None, + copy: bool = None) -> packet.Packet: """Create a MediaPipe ImageFrame packet. - A MediaPipe ImageFrame packet can be created from either the raw pixel data + A MediaPipe ImageFrame packet can be created from an existing MediaPipe + ImageFrame object and the data will be realigned and copied into a new + ImageFrame object inside of the packet. + + A MediaPipe ImageFrame packet can also be created from the raw pixel data represented as a numpy array with one of the uint8, uint16, and float data - types or an existing MediaPipe ImageFrame object. The data will be realigned - and copied into an ImageFrame object inside of the packet. + types. There are three data ownership modes depending on how the 'copy' arg + is set. + + i) Default mode + If copy is not set, mutable data is always copied while the immutable data + is by reference. + + ii) Copy mode (safe) + If copy is set to True, the data will be realigned and copied into an + ImageFrame object inside of the packet regardless the immutablity of the + original data. + + iii) Reference mode (dangerous) + If copy is set to False, the data will be forced to be shared. If the data is + mutable (data.flags.writeable is True), a warning will be raised. Args: data: A MediaPipe ImageFrame object or the raw pixel data that is represnted as a numpy ndarray. image_format: One of the image_frame.ImageFormat enum types. + copy: Indicate if the packet should copy the data from the numpy nparray. Returns: A MediaPipe ImageFrame Packet. Raises: ValueError: - i) When "data" is a numpy ndarray, "image_format" is not provided. + i) When "data" is a numpy ndarray, "image_format" is not provided or + the "data" array is not c_contiguous in the reference mode. ii) When "data" is an ImageFrame object, the "image_format" arg doesn't - match the image format of the "data" ImageFrame object. + match the image format of the "data" ImageFrame object or "copy" is + explicitly set to False. TypeError: If "image format" doesn't match "data" array's data type. Examples: np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8) + # Copy mode by default if the data array is writable. + image_frame_packet = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=np_array) + + # Make the array unwriteable to trigger the reference mode. + np_array.flags.writeable = False image_frame_packet = mp.packet_creator.create_image_frame( image_format=mp.ImageFormat.SRGB, data=np_array) @@ -87,14 +114,33 @@ def create_image_frame( if image_format is not None and data.image_format != image_format: raise ValueError( 'The provided image_format doesn\'t match the one from the data arg.') + if copy is not None and not copy: + raise ValueError( + 'Creating image frame packet by taking a reference of another image frame object is not supported yet.' + ) # pylint:disable=protected-access - return _packet_creator._create_image_frame_with_copy(data) + return _packet_creator._create_image_frame_from_image_frame(data) # pylint:enable=protected-access else: if image_format is None: raise ValueError('Please provide \'image_format\' with \'data\'.') + # If copy arg is not set, copying the data if it's immutable. Otherwise, + # take a reference of the immutable data to avoid data copy. + if copy is None: + copy = True if data.flags.writeable else False + if not copy: + # TODO: Investigate why the first 2 bytes of the data has data + # corruption when "data" is not c_contiguous. + if not data.flags.c_contiguous: + raise ValueError( + 'Reference mode is unavailable if \'data\' is not c_contiguous.') + if data.flags.writeable: + warnings.warn( + '\'data\' is still writeable. Taking a reference of the data to create ImageFrame packet is dangerous.', + RuntimeWarning, 2) # pylint:disable=protected-access - return _packet_creator._create_image_frame_with_copy(image_format, data) + return _packet_creator._create_image_frame_from_pixel_data( + image_format, data, copy) # pylint:enable=protected-access diff --git a/mediapipe/python/packet_test.py b/mediapipe/python/packet_test.py index a987f470d..fe1290ce0 100644 --- a/mediapipe/python/packet_test.py +++ b/mediapipe/python/packet_test.py @@ -18,8 +18,8 @@ import gc import random import sys from absl.testing import absltest +import mediapipe as mp import numpy as np -import mediapipe.python as mp from google.protobuf import text_format from mediapipe.framework.formats import detection_pb2 @@ -294,7 +294,51 @@ class PacketTest(absltest.TestCase): # copy mode. self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) - def testImageFramePacketCopyConstuctionWithCropping(self): + def testImageFramePacketCreationReferenceMode(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8) + rgb_data.flags.writeable = False + initial_ref_count = sys.getrefcount(rgb_data) + image_frame_packet = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=rgb_data) + # Reference mode increase the ref count of the rgb_data by 1. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1) + del image_frame_packet + gc.collect() + # Deleting image_frame_packet should decrese the ref count of rgb_data by 1. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + rgb_data_copy = np.copy(rgb_data) + # rgb_data_copy is a copy of rgb_data and should not increase the ref count. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + text_config = """ + node { + calculator: 'PassThroughCalculator' + input_side_packet: "in" + output_side_packet: "out" + } + """ + graph = mp.CalculatorGraph(graph_config=text_config) + graph.start_run( + input_side_packets={ + 'in': + mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=rgb_data) + }) + # reference mode increase the ref count of the rgb_data by 1. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count + 1) + graph.wait_until_done() + output_packet = graph.get_output_side_packet('out') + del rgb_data + del graph + gc.collect() + # The pixel data of the output image frame packet should still be valid + # after the graph and the original rgb_data data are deleted. + self.assertTrue( + np.array_equal( + mp.packet_getter.get_image_frame(output_packet).numpy_view(), + rgb_data_copy)) + + def testImageFramePacketCopyCreationWithCropping(self): w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3 channels, offset = 3, 10 rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8) diff --git a/mediapipe/python/pybind/BUILD b/mediapipe/python/pybind/BUILD index 72ee24c72..e61fd2e94 100644 --- a/mediapipe/python/pybind/BUILD +++ b/mediapipe/python/pybind/BUILD @@ -14,7 +14,7 @@ load("@pybind11_bazel//:build_defs.bzl", "pybind_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//mediapipe/python:__subpackages__"]) diff --git a/mediapipe/python/pybind/image_frame_util.h b/mediapipe/python/pybind/image_frame_util.h index 189b4e93b..9de9cb4ed 100644 --- a/mediapipe/python/pybind/image_frame_util.h +++ b/mediapipe/python/pybind/image_frame_util.h @@ -28,31 +28,33 @@ namespace python { namespace py = pybind11; -// TODO: Implement the reference mode of image frame creation, which -// takes a reference to the external data rather than copying it over. -// A possible solution is to have a custom PixelDataDeleter: -// The refcount of the numpy array will be increased when the image frame is -// created by taking a reference to the external numpy array data. Then, the -// custom PixelDataDeleter will decrease the refcount when the image frame gets -// destroyed and let Python GC does its job. template std::unique_ptr CreateImageFrame( mediapipe::ImageFormat::Format format, - const py::array_t& data) { + const py::array_t& data, bool copy = true) { int rows = data.shape()[0]; int cols = data.shape()[1]; int width_step = ImageFrame::NumberOfChannelsForFormat(format) * ImageFrame::ByteDepthForFormat(format) * cols; + if (copy) { + auto image_frame = absl::make_unique( + format, /*width=*/cols, /*height=*/rows, width_step, + static_cast(data.request().ptr), + ImageFrame::PixelDataDeleter::kNone); + auto image_frame_copy = absl::make_unique(); + // Set alignment_boundary to kGlDefaultAlignmentBoundary so that both + // GPU and CPU can process it. + image_frame_copy->CopyFrom(*image_frame, + ImageFrame::kGlDefaultAlignmentBoundary); + return image_frame_copy; + } + PyObject* data_pyobject = data.ptr(); auto image_frame = absl::make_unique( format, /*width=*/cols, /*height=*/rows, width_step, static_cast(data.request().ptr), - ImageFrame::PixelDataDeleter::kNone); - auto image_frame_copy = absl::make_unique(); - // Set alignment_boundary to kGlDefaultAlignmentBoundary so that both - // GPU and CPU can process it. - image_frame_copy->CopyFrom(*image_frame, - ImageFrame::kGlDefaultAlignmentBoundary); - return image_frame_copy; + /*deleter=*/[data_pyobject](uint8*) { Py_XDECREF(data_pyobject); }); + Py_XINCREF(data_pyobject); + return image_frame; } } // namespace python diff --git a/mediapipe/python/pybind/packet_creator.cc b/mediapipe/python/pybind/packet_creator.cc index 90959b3dd..579ecb608 100644 --- a/mediapipe/python/pybind/packet_creator.cc +++ b/mediapipe/python/pybind/packet_creator.cc @@ -31,18 +31,18 @@ namespace python { namespace { Packet CreateImageFramePacket(mediapipe::ImageFormat::Format format, - const py::array& data) { + const py::array& data, bool copy) { if (format == mediapipe::ImageFormat::SRGB || format == mediapipe::ImageFormat::SRGBA || format == mediapipe::ImageFormat::GRAY8) { - return Adopt(CreateImageFrame(format, data).release()); + return Adopt(CreateImageFrame(format, data, copy).release()); } else if (format == mediapipe::ImageFormat::GRAY16 || format == mediapipe::ImageFormat::SRGB48 || format == mediapipe::ImageFormat::SRGBA64) { - return Adopt(CreateImageFrame(format, data).release()); + return Adopt(CreateImageFrame(format, data, copy).release()); } else if (format == mediapipe::ImageFormat::VEC32F1 || format == mediapipe::ImageFormat::VEC32F2) { - return Adopt(CreateImageFrame(format, data).release()); + return Adopt(CreateImageFrame(format, data, copy).release()); } throw RaisePyError(PyExc_RuntimeError, absl::StrCat("Unsupported ImageFormat: ", format).c_str()); @@ -560,26 +560,12 @@ void PublicPacketCreators(pybind11::module* m) { } void InternalPacketCreators(pybind11::module* m) { - m->def( - "_create_image_frame_with_copy", - [](mediapipe::ImageFormat::Format format, const py::array& data) { - return CreateImageFramePacket(format, data); - }, - py::arg("format"), py::arg("data").noconvert(), - py::return_value_policy::move); + m->def("_create_image_frame_from_pixel_data", &CreateImageFramePacket, + py::arg("format"), py::arg("data").noconvert(), py::arg("copy"), + py::return_value_policy::move); m->def( - "_create_image_frame_with_reference", - [](mediapipe::ImageFormat::Format format, const py::array& data) { - throw RaisePyError( - PyExc_NotImplementedError, - "Creating image frame packet with reference is not supproted yet."); - }, - py::arg("format"), py::arg("data").noconvert(), - py::return_value_policy::move); - - m->def( - "_create_image_frame_with_copy", + "_create_image_frame_from_image_frame", [](ImageFrame& image_frame) { auto image_frame_copy = absl::make_unique(); // Set alignment_boundary to kGlDefaultAlignmentBoundary so that @@ -590,15 +576,6 @@ void InternalPacketCreators(pybind11::module* m) { }, py::arg("image_frame").noconvert(), py::return_value_policy::move); - m->def( - "_create_image_frame_with_reference", - [](ImageFrame& image_frame) { - throw RaisePyError( - PyExc_NotImplementedError, - "Creating image frame packet with reference is not supproted yet."); - }, - py::arg("image_frame").noconvert(), py::return_value_policy::move); - m->def( "_create_proto", [](const std::string& type_name, const py::bytes& serialized_proto) { @@ -616,7 +593,7 @@ void InternalPacketCreators(pybind11::module* m) { std::move(maybe_holder).ValueOrDie(); auto* copy = const_cast( message_holder->GetProtoMessageLite()); - copy->ParseFromString(serialized_proto); + copy->ParseFromString(std::string(serialized_proto)); return packet_internal::Create(message_holder.release()); }, py::return_value_policy::move); diff --git a/mediapipe/python/timestamp_test.py b/mediapipe/python/timestamp_test.py index fc051d966..bbcd21fa4 100644 --- a/mediapipe/python/timestamp_test.py +++ b/mediapipe/python/timestamp_test.py @@ -17,7 +17,7 @@ import time from absl.testing import absltest -import mediapipe.python as mp +import mediapipe as mp class TimestampTest(absltest.TestCase): diff --git a/mediapipe/util/BUILD b/mediapipe/util/BUILD index 8a64b4499..f662c5ed4 100644 --- a/mediapipe/util/BUILD +++ b/mediapipe/util/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/util/android/BUILD b/mediapipe/util/android/BUILD index e54323220..c900edd73 100644 --- a/mediapipe/util/android/BUILD +++ b/mediapipe/util/android/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "asset_manager_util", diff --git a/mediapipe/util/android/file/base/BUILD b/mediapipe/util/android/file/base/BUILD index b3e3de765..6e5b2390a 100644 --- a/mediapipe/util/android/file/base/BUILD +++ b/mediapipe/util/android/file/base/BUILD @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "base", diff --git a/mediapipe/util/sequence/BUILD b/mediapipe/util/sequence/BUILD index 56b68484a..18aedf8e6 100644 --- a/mediapipe/util/sequence/BUILD +++ b/mediapipe/util/sequence/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/util/tflite/BUILD b/mediapipe/util/tflite/BUILD index 85a8a6e69..9cd774c08 100644 --- a/mediapipe/util/tflite/BUILD +++ b/mediapipe/util/tflite/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = [ "//mediapipe:__subpackages__", diff --git a/mediapipe/util/tflite/operations/BUILD b/mediapipe/util/tflite/operations/BUILD index 3515716ab..a12bbf4a1 100644 --- a/mediapipe/util/tflite/operations/BUILD +++ b/mediapipe/util/tflite/operations/BUILD @@ -13,7 +13,7 @@ # limitations under the License. # -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = [ "//mediapipe:__subpackages__", diff --git a/mediapipe/util/tflite/tflite_gpu_runner.cc b/mediapipe/util/tflite/tflite_gpu_runner.cc index f0624c76a..3065cf8aa 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.cc +++ b/mediapipe/util/tflite/tflite_gpu_runner.cc @@ -162,6 +162,9 @@ absl::Status TFLiteGPURunner::InitializeOpenCL( std::unique_ptr* builder) { #ifdef __ANDROID__ cl::InferenceEnvironmentOptions env_options; + if (!serialized_binary_cache_.empty()) { + env_options.serialized_binary_cache = serialized_binary_cache_; + } cl::InferenceEnvironmentProperties properties; cl::InferenceOptions cl_options; cl_options.priority1 = options_.priority1; diff --git a/mediapipe/util/tflite/tflite_gpu_runner.h b/mediapipe/util/tflite/tflite_gpu_runner.h index c842c9dd6..7d12a56f5 100644 --- a/mediapipe/util/tflite/tflite_gpu_runner.h +++ b/mediapipe/util/tflite/tflite_gpu_runner.h @@ -71,6 +71,16 @@ class TFLiteGPURunner { std::vector GetInputShapes() { return input_shapes_; } std::vector GetOutputShapes() { return output_shapes_; } +#ifdef __ANDROID__ + void SetSerializedBinaryCache(std::vector&& cache) { + serialized_binary_cache_ = std::move(cache); + } + + std::vector GetSerializedBinaryCache() { + return cl_environment_->GetSerializedBinaryCache(); + } +#endif + private: mediapipe::Status InitializeOpenGL( std::unique_ptr* builder); @@ -82,6 +92,8 @@ class TFLiteGPURunner { #ifdef __ANDROID__ std::unique_ptr cl_environment_; + + std::vector serialized_binary_cache_; #endif // graph_ is maintained temporarily and becomes invalid after runner_ is ready diff --git a/mediapipe/util/tracking/BUILD b/mediapipe/util/tracking/BUILD index a7f7b3b6f..0ff5e67eb 100644 --- a/mediapipe/util/tracking/BUILD +++ b/mediapipe/util/tracking/BUILD @@ -15,7 +15,7 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) package(default_visibility = ["//visibility:public"]) diff --git a/requirements.txt b/requirements.txt index 31b2a55c3..02b3ac75a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -absl-py==0.9.0 -numpy==1.19.1 -opencv-python==3.4.10.35 +absl-py +numpy +opencv-python>=3.4.0,<4.0.0 protobuf>=3.11.4 -six==1.15.0 -wheel>=0.34.0 +six +wheel diff --git a/setup.py b/setup.py index 17d81be60..c86ef4c39 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ import distutils.command.clean as clean import glob import os import posixpath +import re import shutil import subprocess import sys @@ -29,9 +30,15 @@ import setuptools import setuptools.command.build_ext as build_ext import setuptools.command.install as install -__version__ = '0.79' +__version__ = '0.7' MP_ROOT_PATH = os.path.dirname(os.path.abspath(__file__)) ROOT_INIT_PY = os.path.join(MP_ROOT_PATH, '__init__.py') +MP_DIR_INIT_PY = os.path.join(MP_ROOT_PATH, 'mediapipe/__init__.py') +MP_DIR_INIT_PY_BACKUP = os.path.join(MP_ROOT_PATH, + 'mediapipe/__init__.py.backup') +MP_THIRD_PARTY_BUILD = os.path.join(MP_ROOT_PATH, 'third_party/BUILD') +MP_THIRD_PARTY_BUILD_BACKUP = os.path.join(MP_ROOT_PATH, + 'third_party/BUILD.backup') if not os.path.exists(ROOT_INIT_PY): open(ROOT_INIT_PY, 'w').close() @@ -45,6 +52,14 @@ def _parse_requirements(path): ] +def _get_long_description(): + # fix the image urls. + return re.sub( + r'(docs/images/|docs/images/mobile/)([A-Za-z0-9_]*\.(png|gif))', + r'https://github.com/google/mediapipe/blob/master/\g<1>\g<2>?raw=true', + open(os.path.join(MP_ROOT_PATH, 'README.md')).read()) + + def _check_bazel(): """Check Bazel binary as well as its version.""" @@ -73,9 +88,39 @@ def _check_bazel(): ) -class GeneratePyProtos(build.build): +class ModifyInitFiles(setuptools.Command): + """Modify the init files for building MediaPipe Python package.""" + + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + # Save the original init file. + shutil.copyfile(MP_DIR_INIT_PY, MP_DIR_INIT_PY_BACKUP) + mp_dir_init_file = open(MP_DIR_INIT_PY, 'a') + mp_dir_init_file.writelines([ + '\n', 'import mediapipe.examples.python as examples\n', + 'from mediapipe.python import *\n', '\n' + ]) + mp_dir_init_file.close() + + +class GeneratePyProtos(setuptools.Command): """Generate MediaPipe Python protobuf files by Protocol Compiler.""" + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + def run(self): if 'PROTOC' in os.environ and os.path.exists(os.environ['PROTOC']): self._protoc = os.environ['PROTOC'] @@ -138,6 +183,7 @@ class BuildBinaryGraphs(build.build): 'build', '--compilation_mode=opt', '--define=MEDIAPIPE_DISABLE_GPU=1', + '--action_env=PYTHON_BIN_PATH=' + sys.executable, os.path.join('mediapipe/graphs/', graph_path), ] if subprocess.call(bazel_command) != 0: @@ -165,6 +211,18 @@ class BazelExtension(setuptools.Extension): class BuildBazelExtension(build_ext.build_ext): """A command that runs Bazel to build a C/C++ extension.""" + user_options = build_ext.build_ext.user_options + [ + ('link-opencv', None, 'if true, build opencv from source.'), + ] + boolean_options = build_ext.build_ext.boolean_options + ['link-opencv'] + + def initialize_options(self): + self.link_opencv = False + build_ext.build_ext.initialize_options(self) + + def finalize_options(self): + build_ext.build_ext.finalize_options(self) + def run(self): _check_bazel() for ext in self.extensions: @@ -182,6 +240,18 @@ class BuildBazelExtension(build_ext.build_ext): '--action_env=PYTHON_BIN_PATH=' + sys.executable, str(ext.bazel_target + '.so'), ] + if not self.link_opencv: + # Ask the opencv_cmake rule to build the static opencv libraries for + # mediapipe python package. By doing this, we can avoid copying the opencv + # .so file into the package. + content = open(MP_THIRD_PARTY_BUILD, + 'r').read().replace('OPENCV_SHARED_LIBS = True', + 'OPENCV_SHARED_LIBS = False') + shutil.move(MP_THIRD_PARTY_BUILD, MP_THIRD_PARTY_BUILD_BACKUP) + build_file = open(MP_THIRD_PARTY_BUILD, 'w') + build_file.write(content) + build_file.close() + bazel_argv.append('--define=OPENCV=source') self.spawn(bazel_argv) ext_bazel_bin_path = os.path.join('bazel-bin', ext.relpath, ext.target_name + '.so') @@ -195,9 +265,24 @@ class BuildBazelExtension(build_ext.build_ext): class Build(build.build): """Build command that builds binary graphs and extension and does a cleanup afterwards.""" + user_options = build.build.user_options + [ + ('link-opencv', None, 'if true, use the installed opencv library.'), + ] + boolean_options = build.build.boolean_options + ['link-opencv'] + + def initialize_options(self): + self.link_opencv = False + build.build.initialize_options(self) + + def finalize_options(self): + build.build.finalize_options(self) + def run(self): + build_ext_obj = self.distribution.get_command_obj('build_ext') + build_ext_obj.link_opencv = self.link_opencv self.run_command('build_binary_graphs') self.run_command('build_ext') + self.run_command('modify_inits') build.build.run(self) self.run_command('remove_generated') @@ -205,9 +290,24 @@ class Build(build.build): class Install(install.install): """Install command that builds binary graphs and extension and does a cleanup afterwards.""" + user_options = install.install.user_options + [ + ('link-opencv', None, 'if true, use the installed opencv library.'), + ] + boolean_options = install.install.boolean_options + ['link-opencv'] + + def initialize_options(self): + self.link_opencv = False + install.install.initialize_options(self) + + def finalize_options(self): + install.install.finalize_options(self) + def run(self): + build_ext_obj = self.distribution.get_command_obj('build_ext') + build_ext_obj.link_opencv = self.link_opencv self.run_command('build_binary_graphs') self.run_command('build_ext') + self.run_command('modify_inits') install.install.run(self) self.run_command('remove_generated') @@ -223,6 +323,14 @@ class RemoveGenerated(clean.clean): 'mediapipe/graphs/**/*.binarypb', recursive=True): sys.stderr.write('removing generated binary graphs: %s\n' % binarypb_file) os.remove(binarypb_file) + # Restore the original init file from the backup. + if os.path.exists(MP_DIR_INIT_PY_BACKUP): + os.remove(MP_DIR_INIT_PY) + shutil.move(MP_DIR_INIT_PY_BACKUP, MP_DIR_INIT_PY) + # Restore the original BUILD file from the backup. + if os.path.exists(MP_THIRD_PARTY_BUILD_BACKUP): + os.remove(MP_THIRD_PARTY_BUILD) + shutil.move(MP_THIRD_PARTY_BUILD_BACKUP, MP_THIRD_PARTY_BUILD) clean.clean.run(self) @@ -231,15 +339,16 @@ setuptools.setup( version=__version__, url='https://github.com/google/mediapipe', description='MediaPipe is the simplest way for researchers and developers to build world-class ML solutions and applications for mobile, edge, cloud and the web.', - author='Mediapipe Authors', + author='MediaPipe Authors', author_email='mediapipe@google.com', - long_description=open(os.path.join(MP_ROOT_PATH, 'README.md')).read(), + long_description=_get_long_description(), long_description_content_type='text/markdown', packages=setuptools.find_packages(exclude=['mediapipe.examples.desktop.*']), install_requires=_parse_requirements('requirements.txt'), cmdclass={ 'build': Build, 'gen_protos': GeneratePyProtos, + 'modify_inits': ModifyInitFiles, 'build_binary_graphs': BuildBinaryGraphs, 'build_ext': BuildBazelExtension, 'install': Install, diff --git a/third_party/BUILD b/third_party/BUILD index 47c05bf38..4d2676751 100644 --- a/third_party/BUILD +++ b/third_party/BUILD @@ -24,22 +24,22 @@ cc_library( visibility = ["//visibility:public"], deps = select({ "//mediapipe:android_x86": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:android_x86_64": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:android_armeabi": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:android_arm": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:android_arm64": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:ios": [ - "@com_github_glog_glog_v_0_3_5//:glog", + "@com_github_glog_glog_no_gflags//:glog", ], "//mediapipe:macos": [ "@com_github_glog_glog//:glog", @@ -53,37 +53,140 @@ cc_library( }), ) -cc_library( - name = "opencv", +config_setting( + name = "opencv_source_build", + define_values = { + "OPENCV": "source", + }, visibility = ["//visibility:public"], - deps = select({ - "//mediapipe:android_x86": [ - "@android_opencv//:libopencv_x86", - ], - "//mediapipe:android_x86_64": [ - "@android_opencv//:libopencv_x86_64", - ], - "//mediapipe:android_armeabi": [ - "@android_opencv//:libopencv_armeabi-v7a", - ], - "//mediapipe:android_arm": [ - "@android_opencv//:libopencv_armeabi-v7a", - ], - "//mediapipe:android_arm64": [ - "@android_opencv//:libopencv_arm64-v8a", - ], - "//mediapipe:ios": [ - "@ios_opencv//:opencv", - ], - "//mediapipe:macos": [ - "@macos_opencv//:opencv", - ], - "//mediapipe:windows": [ - "@windows_opencv//:opencv", - ], - "//conditions:default": [ - "@linux_opencv//:opencv", - ], +) + +alias( + name = "opencv", + actual = select({ + ":opencv_source_build": ":opencv_cmake", + "//conditions:default": ":opencv_binary", + }), + visibility = ["//visibility:public"], +) + +load("@rules_foreign_cc//tools/build_defs:cmake.bzl", "cmake_external") + +# Note: this determines the order in which the libraries are passed to the +# linker, so if library A depends on library B, library B must come _after_. +# Hence core is at the bottom. +OPENCV_MODULES = [ + "calib3d", + "features2d", + "highgui", + "video", + "videoio", + "imgcodecs", + "imgproc", + "core", +] + +# Note: passing both BUILD_SHARED_LIBS=ON and BUILD_STATIC_LIBS=ON to cmake +# still only builds the shared libraries, so we have to choose one or the +# other. We build shared libraries by default, but this variable can be used +# to switch to static libraries. +OPENCV_SHARED_LIBS = True + +OPENCV_SO_VERSION = "3.4" + +cmake_external( + name = "opencv_cmake", + # Values to be passed as -Dkey=value on the CMake command line; + # here are serving to provide some CMake script configuration options + cache_entries = { + "CMAKE_BUILD_TYPE": "Release", + # The module list is always sorted alphabetically so that we do not + # cause a rebuild when changing the link order. + "BUILD_LIST": ",".join(sorted(OPENCV_MODULES)), + "BUILD_TESTS": "OFF", + "BUILD_PERF_TESTS": "OFF", + "BUILD_EXAMPLES": "OFF", + "BUILD_SHARED_LIBS": "ON" if OPENCV_SHARED_LIBS else "OFF", + "WITH_ITT": "OFF", + "WITH_JASPER": "OFF", + "WITH_WEBP": "OFF", + # When building tests, by default Bazel builds them in dynamic mode. + # See https://docs.bazel.build/versions/master/be/c-cpp.html#cc_binary.linkstatic + # For example, when building //mediapipe/calculators/video:opencv_video_encoder_calculator_test, + # the dependency //mediapipe/framework/formats:image_frame_opencv will + # be built as a shared library, which depends on a cv::Mat constructor, + # and expects it to be provided by the main exacutable. The main + # executable depends on libimage_frame_opencv.so and links in + # libopencv_core.a, which contains cv::Mat. However, if + # libopencv_core.a marks its symbols as hidden, then cv::Mat is in + # opencv_video_encoder_calculator_test but it is not exported, so + # libimage_frame_opencv.so fails to find it. + "OPENCV_SKIP_VISIBILITY_HIDDEN": "ON" if not OPENCV_SHARED_LIBS else "OFF", + # The COPY actions in modules/python/python_loader.cmake have issues with symlinks. + # In any case, we don't use this. + "OPENCV_SKIP_PYTHON_LOADER": "ON", + # Need to set this too, for the same reason. + "BUILD_opencv_python": "OFF", + # Ccache causes issues in some of our CI setups. It's not clear that + # ccache would be able to work across sandboxed Bazel builds, either. + # In any case, Bazel does its own caching of the rule's outputs. + "ENABLE_CCACHE": "OFF", + }, + lib_source = "@opencv//:all", + linkopts = [] if OPENCV_SHARED_LIBS else [ + # When using static libraries, the binary that eventually depends on the + # libraries also needs to link in their dependencies, which therefore + # have to be listed here. + # This list depends on which dependencies CMake finds when it configures + # the build, and so depends on what is installed on the local system. + # After building, the linkopts for the current setup can be extracted + # from lib/pkgconfig/opencv.pc in bazel-out + "-ljpeg", + "-lpng", + "-lz", + "-ltiff", + "-lImath", + "-lIlmImf", + "-lIex", + "-lHalf", + "-lIlmThread", + "-ldc1394", + "-lavcodec", + "-lavformat", + "-lavutil", + "-lswscale", + "-lavresample", + "-ldl", + "-lm", + "-lpthread", + "-lrt", + ], + shared_libraries = select({ + "@bazel_tools//src/conditions:darwin": ["libopencv_%s.%s.dylib" % (module, OPENCV_SO_VERSION) for module in OPENCV_MODULES], + # Only the shared objects listed here will be linked in the directory + # that Bazel adds to the RUNPATH of dependent executables. You cannot + # list both the versioned and unversioned name of the .so, and the + # versioned name is the one that the executables actually reference. + "//conditions:default": ["libopencv_%s.so.%s" % (module, OPENCV_SO_VERSION) for module in OPENCV_MODULES], + }) if OPENCV_SHARED_LIBS else None, + static_libraries = [ + "libopencv_%s.a" % module + for module in OPENCV_MODULES + ] if not OPENCV_SHARED_LIBS else None, +) + +alias( + name = "opencv_binary", + actual = select({ + "//mediapipe:android_x86": "@android_opencv//:libopencv_x86", + "//mediapipe:android_x86_64": "@android_opencv//:libopencv_x86_64", + "//mediapipe:android_armeabi": "@android_opencv//:libopencv_armeabi-v7a", + "//mediapipe:android_arm": "@android_opencv//:libopencv_armeabi-v7a", + "//mediapipe:android_arm64": "@android_opencv//:libopencv_arm64-v8a", + "//mediapipe:ios": "@ios_opencv//:opencv", + "//mediapipe:macos": "@macos_opencv//:opencv", + "//mediapipe:windows": "@windows_opencv//:opencv", + "//conditions:default": "@linux_opencv//:opencv", }), ) diff --git a/third_party/ffmpeg_macos.BUILD b/third_party/ffmpeg_macos.BUILD index 6e6f94aa5..d6554ad36 100644 --- a/third_party/ffmpeg_macos.BUILD +++ b/third_party/ffmpeg_macos.BUILD @@ -20,11 +20,11 @@ cc_library( name = "libffmpeg", srcs = glob( [ - "local/opt/ffmpeg/lib/libav*.dylib", + "lib/libav*.dylib", ], ), - hdrs = glob(["local/opt/ffmpeg/include/libav*/*.h"]), - includes = ["local/opt/ffmpeg/include/"], + hdrs = glob(["include/libav*/*.h"]), + includes = ["include/"], linkopts = [ "-lavcodec", "-lavformat", diff --git a/third_party/glog.BUILD b/third_party/glog.BUILD deleted file mode 100644 index 34db6f38d..000000000 --- a/third_party/glog.BUILD +++ /dev/null @@ -1,585 +0,0 @@ -# Copyright 2019 The MediaPipe Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -licenses(["notice"]) - -exports_files(["LICENSE"]) - -config_setting( - name = "android_arm", - values = { - "cpu": "armeabi-v7a", - }, - visibility = ["//visibility:private"], -) - -config_setting( - name = "android_arm64", - values = { - "cpu": "arm64-v8a", - }, - visibility = ["//visibility:private"], -) - -config_setting( - name = "ios_armv7", - values = { - "cpu": "ios_armv7", - }, - visibility = ["//visibility:private"], -) - -config_setting( - name = "ios_arm64", - values = { - "cpu": "ios_arm64", - }, - visibility = ["//visibility:private"], -) - -config_setting( - name = "ios_arm64e", - values = { - "cpu": "ios_arm64e", - }, - visibility = ["//visibility:private"], -) - -config_setting( - name = "libunwind", - values = { - "define": "libunwind=true", - "cpu": "k8", - }, - visibility = ["//visibility:private"], -) - -cc_library( - name = "glog", - srcs = [ - "config_h", - "src/base/commandlineflags.h", - "src/base/googleinit.h", - "src/base/mutex.h", - "src/demangle.cc", - "src/demangle.h", - "src/logging.cc", - "src/raw_logging.cc", - "src/signalhandler.cc", - "src/symbolize.cc", - "src/symbolize.h", - "src/utilities.cc", - "src/utilities.h", - "src/vlog_is_on.cc", - ] + glob(["src/stacktrace*.h"]), - hdrs = [ - "src/glog/log_severity.h", - "src/glog/logging.h", - "src/glog/raw_logging.h", - "src/glog/stl_logging.h", - "src/glog/vlog_is_on.h", - ], - copts = [ - "-Wno-sign-compare", - "-U_XOPEN_SOURCE", - ], - includes = ["./src"], - linkopts = select({ - ":libunwind": ["-lunwind"], - "//conditions:default": [], - }) + select({ - "//conditions:default": ["-lpthread"], - ":android_arm": [], - ":android_arm64": [], - ":ios_armv7": [], - ":ios_arm64": [], - ":ios_arm64e": [], - }), - visibility = ["//visibility:public"], - deps = select({ - "//conditions:default": ["@com_github_gflags_gflags//:gflags"], - ":android_arm": [], - ":android_arm64": [], - ":ios_armv7": [], - ":ios_arm64": [], - ":ios_arm64e": [], - }), -) - -genrule( - name = "run_configure", - srcs = [ - "README", - "Makefile.in", - "config.guess", - "config.sub", - "install-sh", - "ltmain.sh", - "missing", - "libglog.pc.in", - "src/config.h.in", - "src/glog/logging.h.in", - "src/glog/raw_logging.h.in", - "src/glog/stl_logging.h.in", - "src/glog/vlog_is_on.h.in", - ], - outs = [ - "config.h.tmp", - "src/glog/logging.h.tmp", - "src/glog/raw_logging.h", - "src/glog/stl_logging.h", - "src/glog/vlog_is_on.h", - ], - cmd = "$(location :configure)" + - "&& cp -v src/config.h $(location config.h.tmp) " + - "&& cp -v src/glog/logging.h $(location src/glog/logging.h.tmp) " + - "&& cp -v src/glog/raw_logging.h $(location src/glog/raw_logging.h) " + - "&& cp -v src/glog/stl_logging.h $(location src/glog/stl_logging.h) " + - "&& cp -v src/glog/vlog_is_on.h $(location src/glog/vlog_is_on.h) ", - tools = [ - "configure", - ], -) - -genrule( - name = "config_h", - srcs = select({ - "//conditions:default": ["config.h.tmp"], - ":android_arm": ["config.h.android_arm"], - ":android_arm64": ["config.h.android_arm"], - ":ios_armv7": ["config.h.ios_arm"], - ":ios_arm64": ["config.h.ios_arm"], - ":ios_arm64e": ["config.h.ios_arm"], - }), - outs = ["config.h"], - cmd = "echo select $< to be the glog config file. && cp $< $@", -) - -genrule( - name = "logging_h", - srcs = select({ - "//conditions:default": ["src/glog/logging.h.tmp"], - ":android_arm": ["src/glog/logging.h.arm"], - ":android_arm64": ["src/glog/logging.h.arm"], - ":ios_armv7": ["src/glog/logging.h.arm"], - ":ios_arm64": ["src/glog/logging.h.arm"], - ":ios_arm64e": ["src/glog/logging.h.arm"], - }), - outs = ["src/glog/logging.h"], - cmd = "echo select $< to be the glog logging.h file. && cp $< $@", -) - -# Hardcoded android arm config header for glog library. -# TODO: This is a temporary workaround. We should generate the config -# header by running the configure script with the right target toolchain. -ANDROID_ARM_CONFIG = """ -/* Define if glog does not use RTTI */ -/* #undef DISABLE_RTTI */ - -/* Namespace for Google classes */ -#define GOOGLE_NAMESPACE google - -/* Define if you have the 'dladdr' function */ -#define HAVE_DLADDR - -/* Define if you have the 'snprintf' function */ -#define HAVE_SNPRINTF - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_EXECINFO_H */ - -/* Define if you have the 'fcntl' function */ -#define HAVE_FCNTL - -/* Define to 1 if you have the header file. */ -#define HAVE_GLOB_H - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the 'pthread' library (-lpthread). */ -/* #undef HAVE_LIBPTHREAD */ - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_LIBUNWIND_H */ - -/* Define if you have google gflags library */ -/* #undef HAVE_LIB_GFLAGS */ - -/* Define if you have google gmock library */ -/* #undef HAVE_LIB_GMOCK */ - -/* Define if you have google gtest library */ -/* #undef HAVE_LIB_GTEST */ - -/* Define if you have libunwind */ -/* #undef HAVE_LIB_UNWIND */ - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H - -/* Define to disable multithreading support. */ -/* #undef NO_THREADS */ - -/* Define if the compiler implements namespaces */ -#define HAVE_NAMESPACES - -/* Define if you have the 'pread' function */ -#define HAVE_PREAD - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD - -/* Define to 1 if you have the header file. */ -#define HAVE_PWD_H - -/* Define if you have the 'pwrite' function */ -#define HAVE_PWRITE - -/* Define if the compiler implements pthread_rwlock_* */ -#define HAVE_RWLOCK - -/* Define if you have the 'sigaction' function */ -#define HAVE_SIGACTION - -/* Define if you have the 'sigaltstack' function */ -/* #undef HAVE_SIGALTSTACK */ - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYSCALL_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYSLOG_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_SYSCALL_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TIME_H - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_SYS_UCONTEXT_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_UTSNAME_H - -/* Define to 1 if you have the header file. */ -#define HAVE_UCONTEXT_H - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNWIND_H 1 - -/* Define if the compiler supports using expression for operator */ -#define HAVE_USING_OPERATOR - -/* Define if your compiler has __attribute__ */ -#define HAVE___ATTRIBUTE__ - -/* Define if your compiler has __builtin_expect */ -#define HAVE___BUILTIN_EXPECT 1 - -/* Define if your compiler has __sync_val_compare_and_swap */ -#define HAVE___SYNC_VAL_COMPARE_AND_SWAP - -/* Define to the sub-directory in which libtool stores uninstalled libraries. */ - -/* #undef LT_OBJDIR */ - -/* Name of package */ -/* #undef PACKAGE */ - -/* Define to the address where bug reports for this package should be sent. */ -/* #undef PACKAGE_BUGREPORT */ - -/* Define to the full name of this package. */ -/* #undef PACKAGE_NAME */ - -/* Define to the full name and version of this package. */ -/* #undef PACKAGE_STRING */ - -/* Define to the one symbol short name of this package. */ -/* #undef PACKAGE_TARNAME */ - -/* Define to the home page for this package. */ -/* #undef PACKAGE_URL */ - -/* Define to the version of this package. */ -/* #undef PACKAGE_VERSION */ - -/* How to access the PC from a struct ucontext */ -/* #undef PC_FROM_UCONTEXT */ - -/* Define to necessary symbol if this constant uses a non-standard name on -your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* The size of , as computed by sizeof. */ -#define SIZEOF_VOID_P 8 - -/* Define to 1 if you have the ANSI C header files. */ -/* #undef STDC_HEADERS */ - -/* the namespace where STL code like vector<> is defined */ -#define STL_NAMESPACE std - -/* Version number of package */ -/* #undef VERSION */ - -/* Stops putting the code inside the Google namespace */ -#define _END_GOOGLE_NAMESPACE_ } - -/* Puts following code inside the Google namespace */ -#define _START_GOOGLE_NAMESPACE_ namespace google { -""" - -genrule( - name = "gen_android_arm_config", - outs = ["config.h.android_arm"], - cmd = ("echo '%s' > $(location config.h.android_arm)" % ANDROID_ARM_CONFIG), -) - -genrule( - name = "generate_arm_glog_logging_h", - srcs = ["src/glog/logging.h.in"], - outs = ["src/glog/logging.h.arm"], - cmd = ("sed -e 's/@ac_cv___attribute___noinline@/__attribute__((__noinline__))/g'" + - " -e 's/@ac_cv___attribute___noreturn@/__attribute__((__noreturn__))/g'" + - " -e 's/@ac_cv_have___builtin_expect@/1/g'" + - " -e 's/@ac_cv_have___uint16@/0/g'" + - " -e 's/@ac_cv_have_inttypes_h@/1/g'" + - " -e 's/@ac_cv_have_libgflags@/0/g'" + - " -e 's/@ac_cv_have_stdint_h@/1/g'" + - " -e 's/@ac_cv_have_systypes_h@/1/g'" + - " -e 's/@ac_cv_have_u_int16_t@/0/g'" + - " -e 's/@ac_cv_have_uint16_t@/1/g'" + - " -e 's/@ac_cv_have_unistd_h@/1/g'" + - " -e 's/@ac_google_end_namespace@/}/g'" + - " -e 's/@ac_google_namespace@/google/g'" + - " -e 's/@ac_google_start_namespace@/namespace google {/g'" + - " $< > $@"), -) - -# Hardcoded ios arm config header for glog library. -# TODO: This is a temporary workaround. We should generate the config -# header by running the configure script with the right target toolchain. -IOS_ARM_CONFIG = """ -/* define if glog doesnt use RTTI */ -/* #undef DISABLE_RTTI */ - -/* Namespace for Google classes */ -#define GOOGLE_NAMESPACE google - -/* Define if you have the 'dladdr' function */ -#define HAVE_DLADDR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_EXECINFO_H 1 - -/* Define if you have the 'fcntl' function */ -#define HAVE_FCNTL 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_GLOB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the 'pthread' library (-lpthread). */ -#define HAVE_LIBPTHREAD 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LIBUNWIND_H 1 - -/* define if you have google gflags library */ -/* #undef HAVE_LIB_GFLAGS */ - -/* define if you have google gmock library */ -/* #undef HAVE_LIB_GMOCK */ - -/* define if you have google gtest library */ -/* #undef HAVE_LIB_GTEST */ - -/* define if you have libunwind */ -/* #undef HAVE_LIB_UNWIND */ - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* define if the compiler implements namespaces */ -#define HAVE_NAMESPACES 1 - -/* Define if you have the 'pread' function */ -#define HAVE_PREAD 1 - -/* Define if you have POSIX threads libraries and header files. */ -#define HAVE_PTHREAD 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_PWD_H 1 - -/* Define if you have the 'pwrite' function */ -#define HAVE_PWRITE 1 - -/* define if the compiler implements pthread_rwlock_* */ -#define HAVE_RWLOCK 1 - -/* Define if you have the 'sigaction' function */ -#define HAVE_SIGACTION 1 - -/* Define if you have the 'sigaltstack' function */ -#define HAVE_SIGALTSTACK 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_SYSCALL_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_SYSLOG_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_SYSCALL_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_UCONTEXT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_UTSNAME_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_UCONTEXT_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNWIND_H 1 - -/* define if the compiler supports using expression for operator */ -#define HAVE_USING_OPERATOR 1 - -/* define if your compiler has __attribute__ */ -#define HAVE___ATTRIBUTE__ 1 - -/* define if your compiler has __builtin_expect */ -#define HAVE___BUILTIN_EXPECT 1 - -/* define if your compiler has __sync_val_compare_and_swap */ -#define HAVE___SYNC_VAL_COMPARE_AND_SWAP 1 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Name of package */ -#define PACKAGE "glog" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "opensource@google.com" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "glog" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "glog 0.3.5" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "glog" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "0.3.5" - -/* How to access the PC from a struct ucontext */ -/* #undef PC_FROM_UCONTEXT */ - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* The size of 'void *', as computed by sizeof. */ -#define SIZEOF_VOID_P 8 - -/* Define to 1 if you have the ANSI C header files. */ -/* #undef STDC_HEADERS */ - -/* the namespace where STL code like vector<> is defined */ -#define STL_NAMESPACE std - -/* location of source code */ -#define TEST_SRC_DIR "external/com_google_glog" - -/* Version number of package */ -#define VERSION "0.3.5" - -/* Stops putting the code inside the Google namespace */ -#define _END_GOOGLE_NAMESPACE_ } - -/* Puts following code inside the Google namespace */ -#define _START_GOOGLE_NAMESPACE_ namespace google { -""" - -genrule( - name = "gen_ios_arm_config", - outs = ["config.h.ios_arm"], - cmd = ("echo '%s' > $(location config.h.ios_arm)" % IOS_ARM_CONFIG), -) diff --git a/third_party/glog_no_gflags.BUILD b/third_party/glog_no_gflags.BUILD new file mode 100644 index 000000000..c90719512 --- /dev/null +++ b/third_party/glog_no_gflags.BUILD @@ -0,0 +1,8 @@ +licenses(["notice"]) + +load(":bazel/glog.bzl", "glog_library") + +# gflags is not needed on mobile platforms, and tried to link in +# -lpthread, which breaks Android builds. +# TODO: upstream. +glog_library(with_gflags = 0) diff --git a/third_party/opencv_macos.BUILD b/third_party/opencv_macos.BUILD index be1733e04..546249754 100644 --- a/third_party/opencv_macos.BUILD +++ b/third_party/opencv_macos.BUILD @@ -5,25 +5,22 @@ licenses(["notice"]) # BSD license exports_files(["LICENSE"]) -# The following build rule assumes that OpenCV is installed by -# 'brew install opencv@3' command on macos. -# If you install OpenCV separately, please modify the build rule accordingly. cc_library( name = "opencv", srcs = glob( [ - "local/opt/opencv@3/lib/libopencv_core.dylib", - "local/opt/opencv@3/lib/libopencv_calib3d.dylib", - "local/opt/opencv@3/lib/libopencv_features2d.dylib", - "local/opt/opencv@3/lib/libopencv_highgui.dylib", - "local/opt/opencv@3/lib/libopencv_imgcodecs.dylib", - "local/opt/opencv@3/lib/libopencv_imgproc.dylib", - "local/opt/opencv@3/lib/libopencv_video.dylib", - "local/opt/opencv@3/lib/libopencv_videoio.dylib", + "lib/libopencv_core.dylib", + "lib/libopencv_calib3d.dylib", + "lib/libopencv_features2d.dylib", + "lib/libopencv_highgui.dylib", + "lib/libopencv_imgcodecs.dylib", + "lib/libopencv_imgproc.dylib", + "lib/libopencv_video.dylib", + "lib/libopencv_videoio.dylib", ], ), - hdrs = glob(["local/opt/opencv@3/include/opencv2/**/*.h*"]), - includes = ["local/opt/opencv@3/include/"], + hdrs = glob(["include/opencv2/**/*.h*"]), + includes = ["include/"], linkstatic = 1, visibility = ["//visibility:public"], )