Merge branch 'master' into users/pmykhalevych/windows_add_attention_mesh_dll

This commit is contained in:
Pavlo-Ivan Mykhalevych 2021-12-10 16:05:55 +02:00
commit 1cf04343bc
563 changed files with 31743 additions and 4508 deletions

View File

@ -45,7 +45,7 @@ Hair Segmentation
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | [Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | [Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | | [Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | [KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | [AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
@ -79,6 +79,13 @@ run code search using
## Publications ## Publications
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
in Google Developers Blog
* [Prosthesis control via Mirru App using MediaPipe hand tracking](https://developers.googleblog.com/2021/05/control-your-mirru-prosthesis-with-mediapipe-hand-tracking.html)
in Google Developers Blog
* [SignAll SDK: Sign language interface using MediaPipe is now available for
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
in Google Developers Blog
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) * [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) * [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)

View File

@ -16,11 +16,11 @@ bazel_skylib_workspace()
load("@bazel_skylib//lib:versions.bzl", "versions") load("@bazel_skylib//lib:versions.bzl", "versions")
versions.check(minimum_bazel_version = "3.7.2") versions.check(minimum_bazel_version = "3.7.2")
# ABSL cpp library lts_2020_09_23 # ABSL cpp library lts_2021_03_24, patch 2.
http_archive( http_archive(
name = "com_google_absl", name = "com_google_absl",
urls = [ urls = [
"https://github.com/abseil/abseil-cpp/archive/20200923.tar.gz", "https://github.com/abseil/abseil-cpp/archive/refs/tags/20210324.2.tar.gz",
], ],
# Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved. # Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
patches = [ patches = [
@ -29,8 +29,8 @@ http_archive(
patch_args = [ patch_args = [
"-p1", "-p1",
], ],
strip_prefix = "abseil-cpp-20200923", strip_prefix = "abseil-cpp-20210324.2",
sha256 = "b3744a4f7a249d5eaf2309daad597631ce77ea62e0fc6abffbab4b4c3dc0fc08" sha256 = "59b862f50e710277f8ede96f083a5bb8d7c9595376146838b9580be90374ee1f"
) )
http_archive( http_archive(
@ -53,19 +53,12 @@ rules_foreign_cc_dependencies()
all_content = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])""" all_content = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])"""
# GoogleTest/GoogleMock framework. Used by most unit-tests. # GoogleTest/GoogleMock framework. Used by most unit-tests.
# Last updated 2020-06-30. # Last updated 2021-07-02.
http_archive( http_archive(
name = "com_google_googletest", name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/aee0f9d9b5b87796ee8a0ab26b7587ec30e8858e.zip"], urls = ["https://github.com/google/googletest/archive/4ec4cd23f486bf70efcc5d2caa40f24368f752e3.zip"],
patches = [ strip_prefix = "googletest-4ec4cd23f486bf70efcc5d2caa40f24368f752e3",
# fix for https://github.com/google/googletest/issues/2817 sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049",
"@//third_party:com_google_googletest_9d580ea80592189e6d44fa35bcf9cdea8bf620d6.diff"
],
patch_args = [
"-p1",
],
strip_prefix = "googletest-aee0f9d9b5b87796ee8a0ab26b7587ec30e8858e",
sha256 = "04a1751f94244307cebe695a69cc945f9387a80b0ef1af21394a490697c5c895",
) )
# Google Benchmark library. # Google Benchmark library.
@ -164,11 +157,11 @@ http_archive(
http_archive( http_archive(
name = "pybind11", name = "pybind11",
urls = [ urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.4.3.tar.gz", "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.7.1.tar.gz",
"https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz", "https://github.com/pybind/pybind11/archive/v2.7.1.tar.gz",
], ],
sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", sha256 = "616d1c42e4cf14fa27b2a4ff759d7d7b33006fdc5ad8fd603bb2c22622f27020",
strip_prefix = "pybind11-2.4.3", strip_prefix = "pybind11-2.7.1",
build_file = "@pybind11_bazel//:pybind11.BUILD", build_file = "@pybind11_bazel//:pybind11.BUILD",
) )
@ -338,7 +331,10 @@ load("@rules_jvm_external//:defs.bzl", "maven_install")
maven_install( maven_install(
artifacts = [ artifacts = [
"androidx.concurrent:concurrent-futures:1.0.0-alpha03", "androidx.concurrent:concurrent-futures:1.0.0-alpha03",
"androidx.lifecycle:lifecycle-common:2.2.0", "androidx.lifecycle:lifecycle-common:2.3.1",
"androidx.activity:activity:1.2.2",
"androidx.exifinterface:exifinterface:1.3.3",
"androidx.fragment:fragment:1.3.4",
"androidx.annotation:annotation:aar:1.1.0", "androidx.annotation:annotation:aar:1.1.0",
"androidx.appcompat:appcompat:aar:1.1.0-rc01", "androidx.appcompat:appcompat:aar:1.1.0-rc01",
"androidx.camera:camera-core:1.0.0-beta10", "androidx.camera:camera-core:1.0.0-beta10",
@ -353,9 +349,12 @@ maven_install(
"com.google.android.material:material:aar:1.0.0-rc01", "com.google.android.material:material:aar:1.0.0-rc01",
"com.google.auto.value:auto-value:1.8.1", "com.google.auto.value:auto-value:1.8.1",
"com.google.auto.value:auto-value-annotations:1.8.1", "com.google.auto.value:auto-value-annotations:1.8.1",
"com.google.code.findbugs:jsr305:3.0.2", "com.google.code.findbugs:jsr305:latest.release",
"com.google.flogger:flogger-system-backend:0.3.1", "com.google.android.datatransport:transport-api:3.0.0",
"com.google.flogger:flogger:0.3.1", "com.google.android.datatransport:transport-backend-cct:3.1.0",
"com.google.android.datatransport:transport-runtime:3.1.0",
"com.google.flogger:flogger-system-backend:0.6",
"com.google.flogger:flogger:0.6",
"com.google.guava:guava:27.0.1-android", "com.google.guava:guava:27.0.1-android",
"com.google.guava:listenablefuture:1.0", "com.google.guava:listenablefuture:1.0",
"junit:junit:4.12", "junit:junit:4.12",
@ -383,9 +382,9 @@ http_archive(
) )
# Tensorflow repo should always go after the other external dependencies. # Tensorflow repo should always go after the other external dependencies.
# 2021-06-07 # 2021-07-29
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec" _TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3"
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126" _TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56"
http_archive( http_archive(
name = "org_tensorflow", name = "org_tensorflow",
urls = [ urls = [
@ -394,6 +393,8 @@ http_archive(
patches = [ patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff", "@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_objc_cxx17.diff", "@//third_party:org_tensorflow_objc_cxx17.diff",
# Diff is generated with a script, don't update it manually.
"@//third_party:org_tensorflow_custom_ops.diff",
], ],
patch_args = [ patch_args = [
"-p1", "-p1",
@ -406,3 +407,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
tf_workspace3() tf_workspace3()
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2") load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
tf_workspace2() tf_workspace2()
# Edge TPU
http_archive(
name = "libedgetpu",
sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80",
strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7",
urls = [
"https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz"
],
)
load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies")
libedgetpu_dependencies()
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
cc_crosstool(name = "crosstool")

View File

@ -16,12 +16,14 @@ nav_order: 1
Please follow instructions below to build Android example apps in the supported Please follow instructions below to build Android example apps in the supported
MediaPipe [solutions](../solutions/solutions.md). To learn more about these MediaPipe [solutions](../solutions/solutions.md). To learn more about these
example apps, start from [Hello World! on Android](./hello_world_android.md). To example apps, start from [Hello World! on Android](./hello_world_android.md).
incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
## Building Android example apps To incorporate MediaPipe into Android Studio projects, see these
[instructions](./android_solutions.md) to use the MediaPipe Android Solution
APIs (currently in alpha) that are now available in
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
## Building Android example apps with Bazel
### Prerequisite ### Prerequisite
@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
android_sdk_repository() in the android_sdk_repository() in the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file. [`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
Please verify all the necessary packages are installed.
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 19c or above
### Option 1: Build with Bazel in Command Line
Tip: You can run this Tip: You can run this
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh) [script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
to build (and install) all MediaPipe Android example apps. to build (and install) all MediaPipe Android example apps.
@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps.
```bash ```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
``` ```
### Option 2: Build with Bazel in Android Studio
The MediaPipe project can be imported into Android Studio using the Bazel
plugins. This allows the MediaPipe examples to be built and modified in Android
Studio.
To incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
The steps below use Android Studio 3.5 to build and install a MediaPipe example
app:
1. Install and launch Android Studio 3.5.
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 19c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` -> `Plugins` to install `Bazel`.
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
file to point to your SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` -> `Edit Configurations...`.
* Select `Templates` -> `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.

View File

@ -3,7 +3,7 @@ layout: default
title: MediaPipe Android Archive title: MediaPipe Android Archive
parent: MediaPipe on Android parent: MediaPipe on Android
grand_parent: Getting Started grand_parent: Getting Started
nav_order: 2 nav_order: 3
--- ---
# MediaPipe Android Archive # MediaPipe Android Archive
@ -113,9 +113,9 @@ each project.
androidTestImplementation 'androidx.test.ext:junit:1.1.0' androidTestImplementation 'androidx.test.ext:junit:1.1.0'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1' androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1'
// MediaPipe deps // MediaPipe deps
implementation 'com.google.flogger:flogger:0.3.1' implementation 'com.google.flogger:flogger:latest.release'
implementation 'com.google.flogger:flogger-system-backend:0.3.1' implementation 'com.google.flogger:flogger-system-backend:latest.release'
implementation 'com.google.code.findbugs:jsr305:3.0.2' implementation 'com.google.code.findbugs:jsr305:latest.release'
implementation 'com.google.guava:guava:27.0.1-android' implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4' implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library // CameraX core library

View File

@ -0,0 +1,131 @@
---
layout: default
title: MediaPipe Android Solutions
parent: MediaPipe on Android
grand_parent: Getting Started
nav_order: 2
---
# MediaPipe Android Solutions
{: .no_toc }
1. TOC
{:toc}
---
MediaPipe Android Solution APIs (currently in alpha) are available in:
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
* [MediaPipe Face Mesh](../solutions/face_mesh#android-solution-api)
* [MediaPipe Hands](../solutions/hands#android-solution-api)
## Incorporation in Android Studio
Prebuilt packages of Android Solution APIs can be found in
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
To incorporate them into an Android Studio project, add the following into the
project's Gradle dependencies:
```
dependencies {
// MediaPipe solution-core is the foundation of any MediaPipe Solutions.
implementation 'com.google.mediapipe:solution-core:latest.release'
// Optional: MediaPipe Face Detection Solution.
implementation 'com.google.mediapipe:facedetection:latest.release'
// Optional: MediaPipe Face Mesh Solution.
implementation 'com.google.mediapipe:facemesh:latest.release'
// Optional: MediaPipe Hands Solution.
implementation 'com.google.mediapipe:hands:latest.release'
}
```
If you need further customization, instead of using the prebuilt maven packages
consider building a MediaPipe Android Archive library locally from source by
following these [instructions](./android_archive_library.md).
## Building solution example apps
Detailed usage examples of the Android Solution APIs can be found in the
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions)
of the solution example apps.
To build these apps:
1. Open Android Studio Arctic Fox on Linux, macOS, or Windows.
2. Import mediapipe/examples/android/solutions directory into Android Studio.
![Screenshot](../images/import_mp_android_studio_project.png)
3. For Windows users, run `create_win_symlinks.bat` as administrator to create
res directory symlinks.
![Screenshot](../images/run_create_win_symlinks.png)
4. Select "File" -> "Sync Project with Gradle Files" to sync project.
5. Run solution example app in Android Studio.
![Screenshot](../images/run_android_solution_app.png)
6. (Optional) Run solutions on CPU.
MediaPipe solution example apps run the pipeline and model inference on GPU
by default. If needed, for example to run the apps on Android Emulator, set
the `RUN_ON_GPU` boolean variable to `false` in the app's
`MainActivity.java` to run the pipeline and model inference on CPU.
## MediaPipe Solution APIs Terms of Service
Last modified: November 12, 2021
Use of MediaPipe Solution APIs is subject to the
[Google APIs Terms of Service](https://developers.google.com/terms),
[Google API Services User Data Policy](https://developers.google.com/terms/api-services-user-data-policy),
and the terms below. Please check back from time to time as these terms and
policies are occasionally updated.
**Privacy**
When you use MediaPipe Solution APIs, processing of the input data (e.g. images,
video, text) fully happens on-device, and **MediaPipe does not send that input
data to Google servers**. As a result, you can use our APIs for processing data
that should not leave the device.
MediaPipe Android Solution APIs will contact Google servers from time to time in
order to receive things like bug fixes, updated models, and hardware accelerator
compatibility information. MediaPipe Android Solution APIs also send metrics
about the performance and utilization of the APIs in your app to Google. Google
uses this metrics data to measure performance, API usage, debug, maintain and
improve the APIs, and detect misuse or abuse, as further described in our
[Privacy Policy](https://policies.google.com/privacy).
**You are responsible for obtaining informed consent from your app users about
Googles processing of MediaPipe metrics data as required by applicable law.**
Data we collect may include the following, across all MediaPipe Android Solution
APIs:
- Device information (such as manufacturer, model, OS version and build) and
available ML hardware accelerators (GPU and DSP). Used for diagnostics and
usage analytics.
- App identification information (package name / bundle id, app version). Used
for diagnostics and usage analytics.
- API configuration (such as image format, resolution, and MediaPipe version
used). Used for diagnostics and usage analytics.
- Event type (such as initialize, download model, update, run, and detection).
Used for diagnostics and usage analytics.
- Error codes. Used for diagnostics.
- Performance metrics. Used for diagnostics.
- Per-installation identifiers that do not uniquely identify a user or
physical device. Used for operation of remote configuration and usage
analytics.
- Network request sender IP addresses. Used for remote configuration
diagnostics. Collected IP addresses are retained temporarily.

View File

@ -103,7 +103,7 @@ monotonically increasing timestamps. By convention, realtime calculators and
graphs use the recording time or the presentation time as the timestamp for each graphs use the recording time or the presentation time as the timestamp for each
packet, with each timestamp representing microseconds since packet, with each timestamp representing microseconds since
`Jan/1/1970:00:00:00`. This allows packets from various sources to be processed `Jan/1/1970:00:00:00`. This allows packets from various sources to be processed
in a gloablly consistent order. in a globally consistent order.
Normally for offline processing, every input packet is processed and processing Normally for offline processing, every input packet is processed and processing
continues as long as necessary. For online processing, it is often necessary to continues as long as necessary. For online processing, it is often necessary to

View File

@ -31,8 +31,8 @@ stream on an Android device.
## Setup ## Setup
1. Install MediaPipe on your system, see [MediaPipe installation guide] for 1. Install MediaPipe on your system, see
details. [MediaPipe installation guide](./install.md) for details.
2. Install Android Development SDK and Android NDK. See how to do so also in 2. Install Android Development SDK and Android NDK. See how to do so also in
[MediaPipe installation guide]. [MediaPipe installation guide].
3. Enable [developer options] on your Android device. 3. Enable [developer options] on your Android device.
@ -770,7 +770,6 @@ If you ran into any issues, please see the full code of the tutorial
[`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java [`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java
[`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout [`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout
[`FrameProcessor`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java [`FrameProcessor`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java
[MediaPipe installation guide]:./install.md
[`PermissionHelper`]: https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/PermissionHelper.java [`PermissionHelper`]: https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/PermissionHelper.java
[`SurfaceHolder.Callback`]:https://developer.android.com/reference/android/view/SurfaceHolder.Callback.html [`SurfaceHolder.Callback`]:https://developer.android.com/reference/android/view/SurfaceHolder.Callback.html
[`SurfaceView`]:https://developer.android.com/reference/android/view/SurfaceView [`SurfaceView`]:https://developer.android.com/reference/android/view/SurfaceView

View File

@ -31,8 +31,8 @@ stream on an iOS device.
## Setup ## Setup
1. Install MediaPipe on your system, see [MediaPipe installation guide] for 1. Install MediaPipe on your system, see
details. [MediaPipe installation guide](./install.md) for details.
2. Setup your iOS device for development. 2. Setup your iOS device for development.
3. Setup [Bazel] on your system to build and deploy the iOS app. 3. Setup [Bazel] on your system to build and deploy the iOS app.
@ -113,6 +113,10 @@ bazel to build the iOS application. The content of the
5. `Main.storyboard` and `Launch.storyboard` 5. `Main.storyboard` and `Launch.storyboard`
6. `Assets.xcassets` directory. 6. `Assets.xcassets` directory.
Note: In newer versions of Xcode, you may see additional files `SceneDelegate.h`
and `SceneDelegate.m`. Make sure to copy them too and add them to the `BUILD`
file mentioned below.
Copy these files to a directory named `HelloWorld` to a location that can access Copy these files to a directory named `HelloWorld` to a location that can access
the MediaPipe source code. For example, the source code of the application that the MediaPipe source code. For example, the source code of the application that
we will build in this tutorial is located in we will build in this tutorial is located in
@ -247,6 +251,12 @@ We need to get frames from the `_cameraSource` into our application
`MPPInputSourceDelegate`. So our application `ViewController` can be a delegate `MPPInputSourceDelegate`. So our application `ViewController` can be a delegate
of `_cameraSource`. of `_cameraSource`.
Update the interface definition of `ViewController` accordingly:
```
@interface ViewController () <MPPInputSourceDelegate>
```
To handle camera setup and process incoming frames, we should use a queue To handle camera setup and process incoming frames, we should use a queue
different from the main queue. Add the following to the implementation block of different from the main queue. Add the following to the implementation block of
the `ViewController`: the `ViewController`:
@ -288,6 +298,12 @@ utility called `MPPLayerRenderer` to display images on the screen. This utility
can be used to display `CVPixelBufferRef` objects, which is the type of the can be used to display `CVPixelBufferRef` objects, which is the type of the
images provided by `MPPCameraInputSource` to its delegates. images provided by `MPPCameraInputSource` to its delegates.
In `ViewController.m`, add the following import line:
```
#import "mediapipe/objc/MPPLayerRenderer.h"
```
To display images of the screen, we need to add a new `UIView` object called To display images of the screen, we need to add a new `UIView` object called
`_liveView` to the `ViewController`. `_liveView` to the `ViewController`.
@ -411,6 +427,12 @@ Objective-C++.
### Use the graph in `ViewController` ### Use the graph in `ViewController`
In `ViewController.m`, add the following import line:
```
#import "mediapipe/objc/MPPGraph.h"
```
Declare a static constant with the name of the graph, the input stream and the Declare a static constant with the name of the graph, the input stream and the
output stream: output stream:
@ -549,6 +571,12 @@ method to receive packets on this output stream and display them on the screen:
} }
``` ```
Update the interface definition of `ViewController` with `MPPGraphDelegate`:
```
@interface ViewController () <MPPGraphDelegate, MPPInputSourceDelegate>
```
And that is all! Build and run the app on your iOS device. You should see the And that is all! Build and run the app on your iOS device. You should see the
results of running the edge detection graph on a live video feed. Congrats! results of running the edge detection graph on a live video feed. Congrats!
@ -560,6 +588,5 @@ appropriate `BUILD` file dependencies for the edge detection graph.
[Bazel]:https://bazel.build/ [Bazel]:https://bazel.build/
[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt [`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt
[MediaPipe installation guide]:./install.md [common]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/common
[common]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/common) [helloworld]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/helloworld
[helloworld]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/helloworld)

View File

@ -43,104 +43,189 @@ install --user six`.
3. Install OpenCV and FFmpeg. 3. Install OpenCV and FFmpeg.
Option 1. Use package manager tool to install the pre-compiled OpenCV **Option 1**. Use package manager tool to install the pre-compiled OpenCV
libraries. FFmpeg will be installed via libopencv-video-dev. libraries. FFmpeg will be installed via `libopencv-video-dev`.
Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take OS | OpenCV
option 2 or 3 to install OpenCV 3 or above. -------------------- | ------
Debian 9 (stretch) | 2.4
Debian 10 (buster) | 3.2
Debian 11 (bullseye) | 4.5
Ubuntu 16.04 LTS | 2.4
Ubuntu 18.04 LTS | 3.2
Ubuntu 20.04 LTS | 4.2
Ubuntu 20.04 LTS | 4.2
Ubuntu 21.04 | 4.5
```bash ```bash
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \ $ sudo apt-get install -y \
libopencv-calib3d-dev libopencv-features2d-dev \ libopencv-core-dev \
libopencv-imgproc-dev libopencv-video-dev libopencv-highgui-dev \
libopencv-calib3d-dev \
libopencv-features2d-dev \
libopencv-imgproc-dev \
libopencv-video-dev
``` ```
Debian 9 and Ubuntu 18.04 install the packages in MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured
`/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and for OpenCV 2/3 and should work correctly on any architecture:
[`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04
may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow
the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and
[`ffmpeg_linux.BUILD`] files accordingly.
Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the
library path needs to be modified like the following:
```bash ```bash
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD # WORKSPACE
new_local_repository(
name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr",
)
# opencv_linux.BUILD for OpenCV 2/3 installed from Debian package
cc_library(
name = "opencv",
linkopts = [
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
``` ```
Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account
and modify MediaPipe's OpenCV config. current architecture:
Option 3. Follow OpenCV's ```bash
# WORKSPACE
new_local_repository(
name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr",
)
# opencv_linux.BUILD for OpenCV 4 installed from Debian package
cc_library(
name = "opencv",
hdrs = glob([
# Uncomment according to your multiarch value (gcc -print-multiarch):
# "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h",
# "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h",
# "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h",
"include/opencv4/opencv2/**/*.h*",
]),
includes = [
# Uncomment according to your multiarch value (gcc -print-multiarch):
# "include/aarch64-linux-gnu/opencv4/",
# "include/arm-linux-gnueabihf/opencv4/",
# "include/x86_64-linux-gnu/opencv4/",
"include/opencv4/",
],
linkopts = [
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
```
**Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from
source and modify MediaPipe's OpenCV config. This option will do all steps
defined in Option 3 automatically.
**Option 3**. Follow OpenCV's
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
to manually build OpenCV from source code. to manually build OpenCV from source code.
Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point
[`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to
libraries. For example if OpenCV and FFmpeg are both manually installed in `/usr/local/` which is recommended by default.
"/usr/local/", you will need to update: (1) the "linux_opencv" and
"linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv" OpenCV 2/3 setup:
cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg"
cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below:
```bash ```bash
# WORKSPACE
new_local_repository( new_local_repository(
name = "linux_opencv", name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD", build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr/local", path = "/usr/local",
) )
# opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local
cc_library(
name = "opencv",
linkopts = [
"-L/usr/local/lib",
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
```
OpenCV 4 setup:
```bash
# WORKSPACE
new_local_repository( new_local_repository(
name = "linux_ffmpeg", name = "linux_opencv",
build_file = "@//third_party:ffmpeg_linux.BUILD", build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr/local", path = "/usr/local",
) )
# opencv_linux.BUILD for OpenCV 4 installed to /usr/local
cc_library( cc_library(
name = "opencv", name = "opencv",
srcs = glob( hdrs = glob([
[ "include/opencv4/opencv2/**/*.h*",
"lib/libopencv_core.so", ]),
"lib/libopencv_highgui.so", includes = [
"lib/libopencv_imgcodecs.so", "include/opencv4/",
"lib/libopencv_imgproc.so", ],
"lib/libopencv_video.so", linkopts = [
"lib/libopencv_videoio.so", "-L/usr/local/lib",
], "-l:libopencv_core.so",
), "-l:libopencv_calib3d.so",
hdrs = glob([ "-l:libopencv_features2d.so",
# For OpenCV 3.x "-l:libopencv_highgui.so",
"include/opencv2/**/*.h*", "-l:libopencv_imgcodecs.so",
# For OpenCV 4.x "-l:libopencv_imgproc.so",
# "include/opencv4/opencv2/**/*.h*", "-l:libopencv_video.so",
]), "-l:libopencv_videoio.so",
includes = [ ],
# For OpenCV 3.x )
"include/", ```
# For OpenCV 4.x
# "include/opencv4/", Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work
], for any architecture:
linkstatic = 1,
visibility = ["//visibility:public"], ```bash
# WORKSPACE
new_local_repository(
name = "linux_ffmpeg",
build_file = "@//third_party:ffmpeg_linux.BUILD",
path = "/usr"
) )
# ffmpeg_linux.BUILD for FFmpeg installed from Debian package
cc_library( cc_library(
name = "libffmpeg", name = "libffmpeg",
srcs = glob( linkopts = [
[ "-l:libavcodec.so",
"lib/libav*.so", "-l:libavformat.so",
], "-l:libavutil.so",
), ],
hdrs = glob(["include/libav*/*.h"]),
includes = ["include"],
linkopts = [
"-lavcodec",
"-lavformat",
"-lavutil",
],
linkstatic = 1,
visibility = ["//visibility:public"],
) )
``` ```
@ -711,7 +796,7 @@ This will use a Docker image that will isolate mediapipe's installation from the
```bash ```bash
$ docker run -it --name mediapipe mediapipe:latest $ docker run -it --name mediapipe mediapipe:latest
root@bca08b91ff63:/mediapipe# GLOG_logtostderr=1 bazel run --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world root@bca08b91ff63:/mediapipe# GLOG_logtostderr=1 bazelisk run --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world
# Should print: # Should print:
# Hello World! # Hello World!

View File

@ -22,12 +22,23 @@ Solution | NPM Package | Example
[Face Detection][Fd-pg] | [@mediapipe/face_detection][Fd-npm] | [mediapipe.dev/demo/face_detection][Fd-demo] [Face Detection][Fd-pg] | [@mediapipe/face_detection][Fd-npm] | [mediapipe.dev/demo/face_detection][Fd-demo]
[Hands][H-pg] | [@mediapipe/hands][H-npm] | [mediapipe.dev/demo/hands][H-demo] [Hands][H-pg] | [@mediapipe/hands][H-npm] | [mediapipe.dev/demo/hands][H-demo]
[Holistic][Ho-pg] | [@mediapipe/holistic][Ho-npm] | [mediapipe.dev/demo/holistic][Ho-demo] [Holistic][Ho-pg] | [@mediapipe/holistic][Ho-npm] | [mediapipe.dev/demo/holistic][Ho-demo]
[Objectron][Ob-pg] | [@mediapipe/objectron][Ob-npm] | [mediapipe.dev/demo/objectron][Ob-demo]
[Pose][P-pg] | [@mediapipe/pose][P-npm] | [mediapipe.dev/demo/pose][P-demo] [Pose][P-pg] | [@mediapipe/pose][P-npm] | [mediapipe.dev/demo/pose][P-demo]
[Selfie Segmentation][S-pg] | [@mediapipe/selfie_segmentation][S-npm] | [mediapipe.dev/demo/selfie_segmentation][S-demo] [Selfie Segmentation][S-pg] | [@mediapipe/selfie_segmentation][S-npm] | [mediapipe.dev/demo/selfie_segmentation][S-demo]
Click on a solution link above for more information, including API and code Click on a solution link above for more information, including API and code
snippets. snippets.
### Supported plaforms:
| Browser | Platform | Notes |
| ------- | ----------------------- | -------------------------------------- |
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
| | | unsupported. |
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
| | | MacBook |
The quickest way to get acclimated is to look at the examples above. Each demo The quickest way to get acclimated is to look at the examples above. Each demo
has a link to a [CodePen][codepen] so that you can edit the code and try it has a link to a [CodePen][codepen] so that you can edit the code and try it
yourself. We have included a number of utility packages to help you get started: yourself. We have included a number of utility packages to help you get started:
@ -67,33 +78,24 @@ affecting your work, restrict your request to a `<minor>` number. e.g.,
[F-pg]: ../solutions/face_mesh#javascript-solution-api [F-pg]: ../solutions/face_mesh#javascript-solution-api
[Fd-pg]: ../solutions/face_detection#javascript-solution-api [Fd-pg]: ../solutions/face_detection#javascript-solution-api
[H-pg]: ../solutions/hands#javascript-solution-api [H-pg]: ../solutions/hands#javascript-solution-api
[Ob-pg]: ../solutions/objectron#javascript-solution-api
[P-pg]: ../solutions/pose#javascript-solution-api [P-pg]: ../solutions/pose#javascript-solution-api
[S-pg]: ../solutions/selfie_segmentation#javascript-solution-api [S-pg]: ../solutions/selfie_segmentation#javascript-solution-api
[Ho-npm]: https://www.npmjs.com/package/@mediapipe/holistic [Ho-npm]: https://www.npmjs.com/package/@mediapipe/holistic
[F-npm]: https://www.npmjs.com/package/@mediapipe/face_mesh [F-npm]: https://www.npmjs.com/package/@mediapipe/face_mesh
[Fd-npm]: https://www.npmjs.com/package/@mediapipe/face_detection [Fd-npm]: https://www.npmjs.com/package/@mediapipe/face_detection
[H-npm]: https://www.npmjs.com/package/@mediapipe/hands [H-npm]: https://www.npmjs.com/package/@mediapipe/hands
[Ob-npm]: https://www.npmjs.com/package/@mediapipe/objectron
[P-npm]: https://www.npmjs.com/package/@mediapipe/pose [P-npm]: https://www.npmjs.com/package/@mediapipe/pose
[S-npm]: https://www.npmjs.com/package/@mediapipe/selfie_segmentation [S-npm]: https://www.npmjs.com/package/@mediapipe/selfie_segmentation
[draw-npm]: https://www.npmjs.com/package/@mediapipe/drawing_utils [draw-npm]: https://www.npmjs.com/package/@mediapipe/drawing_utils
[cam-npm]: https://www.npmjs.com/package/@mediapipe/camera_utils [cam-npm]: https://www.npmjs.com/package/@mediapipe/camera_utils
[ctrl-npm]: https://www.npmjs.com/package/@mediapipe/control_utils [ctrl-npm]: https://www.npmjs.com/package/@mediapipe/control_utils
[Ho-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/holistic
[F-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_mesh
[Fd-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_detection
[H-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/hands
[P-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/pose
[P-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/selfie_segmentation
[Ho-pen]: https://code.mediapipe.dev/codepen/holistic
[F-pen]: https://code.mediapipe.dev/codepen/face_mesh
[Fd-pen]: https://code.mediapipe.dev/codepen/face_detection
[H-pen]: https://code.mediapipe.dev/codepen/hands
[P-pen]: https://code.mediapipe.dev/codepen/pose
[S-pen]: https://code.mediapipe.dev/codepen/selfie_segmentation
[Ho-demo]: https://mediapipe.dev/demo/holistic [Ho-demo]: https://mediapipe.dev/demo/holistic
[F-demo]: https://mediapipe.dev/demo/face_mesh [F-demo]: https://mediapipe.dev/demo/face_mesh
[Fd-demo]: https://mediapipe.dev/demo/face_detection [Fd-demo]: https://mediapipe.dev/demo/face_detection
[H-demo]: https://mediapipe.dev/demo/hands [H-demo]: https://mediapipe.dev/demo/hands
[Ob-demo]: https://mediapipe.dev/demo/objectron
[P-demo]: https://mediapipe.dev/demo/pose [P-demo]: https://mediapipe.dev/demo/pose
[S-demo]: https://mediapipe.dev/demo/selfie_segmentation [S-demo]: https://mediapipe.dev/demo/selfie_segmentation
[npm]: https://www.npmjs.com/package/@mediapipe [npm]: https://www.npmjs.com/package/@mediapipe

View File

@ -74,7 +74,7 @@ Mapping\[str, Packet\] | std::map<std::string, Packet> | create_st
np.ndarray<br>(cv.mat and PIL.Image) | mp::ImageFrame | create_image_frame(<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;format=ImageFormat.SRGB,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;data=mat) | get_image_frame(packet) np.ndarray<br>(cv.mat and PIL.Image) | mp::ImageFrame | create_image_frame(<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;format=ImageFormat.SRGB,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;data=mat) | get_image_frame(packet)
np.ndarray | mp::Matrix | create_matrix(data) | get_matrix(packet) np.ndarray | mp::Matrix | create_matrix(data) | get_matrix(packet)
Google Proto Message | Google Proto Message | create_proto(proto) | get_proto(packet) Google Proto Message | Google Proto Message | create_proto(proto) | get_proto(packet)
List\[Proto\] | std::vector\<Proto\> | create_proto_vector(proto_list) | get_proto_list(packet) List\[Proto\] | std::vector\<Proto\> | n/a | get_proto_list(packet)
It's not uncommon that users create custom C++ classes and and send those into It's not uncommon that users create custom C++ classes and and send those into
the graphs and calculators. To allow the custom classes to be used in Python the graphs and calculators. To allow the custom classes to be used in Python

Binary file not shown.

After

Width:  |  Height:  |  Size: 797 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -45,7 +45,7 @@ Hair Segmentation
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | [Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | [Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | | [Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | [KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | [AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
@ -79,6 +79,13 @@ run code search using
## Publications ## Publications
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
in Google Developers Blog
* [Prosthesis control via Mirru App using MediaPipe hand tracking](https://developers.googleblog.com/2021/05/control-your-mirru-prosthesis-with-mediapipe-hand-tracking.html)
in Google Developers Blog
* [SignAll SDK: Sign language interface using MediaPipe is now available for
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
in Google Developers Blog
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) * [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) * [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)

View File

@ -121,12 +121,10 @@ with mp_face_detection.FaceDetection(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_detection.process(image) results = face_detection.process(image)
# Draw the face detection annotations on the image. # Draw the face detection annotations on the image.
@ -135,7 +133,8 @@ with mp_face_detection.FaceDetection(
if results.detections: if results.detections:
for detection in results.detections: for detection in results.detections:
mp_drawing.draw_detection(image, detection) mp_drawing.draw_detection(image, detection)
cv2.imshow('MediaPipe Face Detection', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -200,7 +199,7 @@ const faceDetection = new FaceDetection({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`; return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
}}); }});
faceDetection.setOptions({ faceDetection.setOptions({
modelSelection: 0 modelSelection: 0,
minDetectionConfidence: 0.5 minDetectionConfidence: 0.5
}); });
faceDetection.onResults(onResults); faceDetection.onResults(onResults);
@ -216,6 +215,214 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
dependencies and try the Android Solution API in the companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facedetection),
and learn more in the usage example below.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [modelSelection](#model_selection)
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(false)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Face Detection Solution.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> faceDetection.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip =
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseTip.getX(), noseTip.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceDetectionResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
faceDetection.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(true)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
// Connects MediaPipe Face Detection Solution to the user-defined ImageView
// instance that allows users to have the custom drawing of the output landmarks
// on it. See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultImageView.java
// as an example.
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
int width = faceDetectionResult.inputBitmap().getWidth();
int height = faceDetectionResult.inputBitmap().getHeight();
RelativeKeypoint noseTip =
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip coordinates (pixel values): x=%f, y=%f",
noseTip.getX() * width, noseTip.getY() * height));
// Request canvas drawing.
imageView.setFaceDetectionResult(faceDetectionResult);
runOnUiThread(() -> imageView.update());
});
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
faceDetection.send(bitmap);
}
}
});
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(false)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Face Detection Solution.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> faceDetection.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip =
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseTip.getX(), noseTip.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceDetectionResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
faceDetection.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -111,6 +111,23 @@ You can find more information about the face landmark model in this
:------------------------------------------------------------------------: | :------------------------------------------------------------------------: |
*Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* | *Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
#### Attention Mesh Model
In addition to the [Face Landmark Model](#face-landmark-model) we provide
another model that applies
[attention](https://en.wikipedia.org/wiki/Attention_(machine_learning)) to
semantically meaningful face regions, and therefore predicting landmarks more
accurately around lips, eyes and irises, at the expense of more compute. It
enables applications like AR makeup and AR puppeteering.
The attention mesh model can be selected in the Solution APIs via the
[refine_landmarks](#refine_landmarks) option. You can also find more information
about the model in this [paper](https://arxiv.org/abs/2006.10962).
![attention_mesh_architecture.png](../images/attention_mesh_architecture.png) |
:---------------------------------------------------------------------------: |
*Fig 3. Attention Mesh: Overview of model architecture.* |
## Face Geometry Module ## Face Geometry Module
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
@ -145,8 +162,8 @@ be set freely, however for better results it is advised to set them as close to
the *real physical camera parameters* as possible. the *real physical camera parameters* as possible.
![face_geometry_metric_3d_space.gif](../images/face_geometry_metric_3d_space.gif) | ![face_geometry_metric_3d_space.gif](../images/face_geometry_metric_3d_space.gif) |
:----------------------------------------------------------------------------: | :-------------------------------------------------------------------------------: |
*Fig 3. A visualization of multiple key elements in the Metric 3D space.* | *Fig 4. A visualization of multiple key elements in the Metric 3D space.* |
#### Canonical Face Model #### Canonical Face Model
@ -210,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) | | ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
| :---------------------------------------------------------------------: | | :---------------------------------------------------------------------: |
| *Fig 4. An example of face effects rendered by the Face Geometry Effect Renderer.* | | *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* |
## Solution APIs ## Solution APIs
@ -234,6 +251,12 @@ unrelated, images. Default to `false`.
Maximum number of faces to detect. Default to `1`. Maximum number of faces to detect. Default to `1`.
#### refine_landmarks
Whether to further refine the landmark coordinates around the eyes and lips, and
output additional landmarks around the irises by applying the
[Attention Mesh Model](#attention-mesh-model). Default to `false`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
@ -271,6 +294,7 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [max_num_faces](#max_num_faces) * [max_num_faces](#max_num_faces)
* [refine_landmarks](#refine_landmarks)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -278,6 +302,7 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh mp_face_mesh = mp.solutions.face_mesh
# For static images: # For static images:
@ -286,6 +311,7 @@ drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh( with mp_face_mesh.FaceMesh(
static_image_mode=True, static_image_mode=True,
max_num_faces=1, max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5) as face_mesh: min_detection_confidence=0.5) as face_mesh:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
@ -301,15 +327,32 @@ with mp_face_mesh.FaceMesh(
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image=annotated_image, image=annotated_image,
landmark_list=face_landmarks, landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS, connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec, landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec) connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input: # For webcam input:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh( with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5, min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh: min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened(): while cap.isOpened():
@ -319,12 +362,10 @@ with mp_face_mesh.FaceMesh(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_mesh.process(image) results = face_mesh.process(image)
# Draw the face mesh annotations on the image. # Draw the face mesh annotations on the image.
@ -335,10 +376,26 @@ with mp_face_mesh.FaceMesh(
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image=image, image=image,
landmark_list=face_landmarks, landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS, connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec, landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec) connection_drawing_spec=mp_drawing_styles
cv2.imshow('MediaPipe FaceMesh', image) .get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -353,6 +410,7 @@ and the following usage example.
Supported configuration options: Supported configuration options:
* [maxNumFaces](#max_num_faces) * [maxNumFaces](#max_num_faces)
* [refineLandmarks](#refine_landmarks)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -393,8 +451,10 @@ function onResults(results) {
{color: '#C0C0C070', lineWidth: 1}); {color: '#C0C0C070', lineWidth: 1});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'}); drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'}); drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_IRIS, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_IRIS, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'}); drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'});
} }
@ -407,6 +467,7 @@ const faceMesh = new FaceMesh({locateFile: (file) => {
}}); }});
faceMesh.setOptions({ faceMesh.setOptions({
maxNumFaces: 1, maxNumFaces: 1,
refineLandmarks: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });
@ -423,6 +484,202 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
dependencies and try the Android Solution API in the companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh),
and learn more in the usage example below.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [maxNumFaces](#max_num_faces)
* [refineLandmarks](#refine_landmarks)
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setStaticImageMode(false)
.setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build();
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Face Mesh Solution.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> faceMesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceMesh.setResultListener(
faceMeshResult -> {
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
faceMesh.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setStaticImageMode(true)
.setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build();
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
// Connects MediaPipe Face Mesh Solution to the user-defined ImageView instance
// that allows users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
// as an example.
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
faceMesh.setResultListener(
faceMeshResult -> {
int width = faceMeshResult.inputBitmap().getWidth();
int height = faceMeshResult.inputBitmap().getHeight();
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe Face Mesh nose coordinates (pixel values): x=%f, y=%f",
noseLandmark.getX() * width, noseLandmark.getY() * height));
// Request canvas drawing.
imageView.setFaceMeshResult(faceMeshResult);
runOnUiThread(() -> imageView.update());
});
faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
faceMesh.send(bitmap);
}
}
});
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setStaticImageMode(false)
.setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build();
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Face Mesh Solution.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> faceMesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceMesh.setResultListener(
faceMeshResult -> {
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
faceMesh.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
mobile real-time uses in a manner similar to the face detection model in mobile real-time uses in a manner similar to the face detection model in
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex [MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
task: our task: our
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite) [lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
has to work across a variety of hand sizes with a large scale span (~20x) and
[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
have to work across a variety of hand sizes with a large scale span (~20x)
relative to the image frame and be able to detect occluded and self-occluded relative to the image frame and be able to detect occluded and self-occluded
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
region, the lack of such features in hands makes it comparatively difficult to region, the lack of such features in hands makes it comparatively difficult to
@ -120,7 +122,7 @@ just 86.22%.
### Hand Landmark Model ### Hand Landmark Model
After the palm detection over the whole image our subsequent hand landmark After the palm detection over the whole image our subsequent hand landmark
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite) [model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite)
performs precise keypoint localization of 21 3D hand-knuckle coordinates inside performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
the detected hand regions via regression, that is direct coordinate prediction. the detected hand regions via regression, that is direct coordinate prediction.
The model learns a consistent internal hand pose representation and is robust The model learns a consistent internal hand pose representation and is robust
@ -163,6 +165,11 @@ unrelated, images. Default to `false`.
Maximum number of hands to detect. Default to `2`. Maximum number of hands to detect. Default to `2`.
#### model_complexity
Complexity of the hand landmark model: `0` or `1`. Landmark accuracy as well as
inference latency generally go up with the model complexity. Default to `1`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the
@ -190,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
and the smaller the value the closer the landmark is to the camera. The and the smaller the value the closer the landmark is to the camera. The
magnitude of `z` uses roughly the same scale as `x`. magnitude of `z` uses roughly the same scale as `x`.
#### multi_hand_world_landmarks
Collection of detected/tracked hands, where each hand is represented as a list
of 21 hand landmarks in world coordinates. Each landmark consists of the
following:
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
hand's approximate geometric center.
* `visibility`: Identical to that defined in the corresponding
[multi_hand_landmarks](#multi_hand_landmarks).
#### multi_handedness #### multi_handedness
Collection of handedness of the detected/tracked hands (i.e. is it a left or Collection of handedness of the detected/tracked hands (i.e. is it a left or
@ -212,6 +230,7 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [max_num_hands](#max_num_hands) * [max_num_hands](#max_num_hands)
* [model_complexity](#model_complexity)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -219,6 +238,7 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands mp_hands = mp.solutions.hands
# For static images: # For static images:
@ -248,13 +268,24 @@ with mp_hands.Hands(
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})' f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
) )
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS) annotated_image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite( cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1)) '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
# Draw hand world landmarks.
if not results.multi_hand_world_landmarks:
continue
for hand_world_landmarks in results.multi_hand_world_landmarks:
mp_drawing.plot_landmarks(
hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)
# For webcam input: # For webcam input:
cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(0)
with mp_hands.Hands( with mp_hands.Hands(
model_complexity=0,
min_detection_confidence=0.5, min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands: min_tracking_confidence=0.5) as hands:
while cap.isOpened(): while cap.isOpened():
@ -264,12 +295,10 @@ with mp_hands.Hands(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image) results = hands.process(image)
# Draw the hand annotations on the image. # Draw the hand annotations on the image.
@ -278,8 +307,13 @@ with mp_hands.Hands(
if results.multi_hand_landmarks: if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, hand_landmarks, mp_hands.HAND_CONNECTIONS) image,
cv2.imshow('MediaPipe Hands', image) hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -294,6 +328,7 @@ and a [fun application], and the following usage example.
Supported configuration options: Supported configuration options:
* [maxNumHands](#max_num_hands) * [maxNumHands](#max_num_hands)
* [modelComplexity](#model_complexity)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -343,6 +378,7 @@ const hands = new Hands({locateFile: (file) => {
}}); }});
hands.setOptions({ hands.setOptions({
maxNumHands: 2, maxNumHands: 2,
modelComplexity: 1,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });
@ -359,6 +395,207 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
dependencies and try the Android Solution API in the companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands),
and learn more in the usage example below.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [maxNumHands](#max_num_hands)
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(false)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Hands Solution.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<HandsResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, hands.getGlContext(), hands.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(handsResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
hands.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(true)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
// Connects MediaPipe Hands Solution to the user-defined ImageView instance that
// allows users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
// as an example.
HandsResultImageView imageView = new HandsResultImageView(this);
hands.setResultListener(
handsResult -> {
if (result.multiHandLandmarks().isEmpty()) {
return;
}
int width = handsResult.inputBitmap().getWidth();
int height = handsResult.inputBitmap().getHeight();
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
wristLandmark.getX() * width, wristLandmark.getY() * height));
// Request canvas drawing.
imageView.setHandsResult(handsResult);
runOnUiThread(() -> imageView.update());
});
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
hands.send(bitmap);
}
}
});
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(false)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Hands Solution.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<HandsResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, hands.getGlContext(), hands.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(handsResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
hands.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -147,6 +147,23 @@ If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode) images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`. is also set to `true`. Default to `true`.
#### enable_segmentation
If set to `true`, in addition to the pose, face and hand landmarks the solution
also generates the segmentation mask. Default to `false`.
#### smooth_segmentation
If set to `true`, the solution filters segmentation masks across different input
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`.
#### refine_face_landmarks
Whether to further refine the landmark coordinates around the eyes and lips, and
output additional landmarks around the irises. Default to `false`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -207,6 +224,15 @@ the camera. The magnitude of `z` uses roughly the same scale as `x`.
A list of 21 hand landmarks on the right hand, in the same representation as A list of 21 hand landmarks on the right hand, in the same representation as
[left_hand_landmarks](#left_hand_landmarks). [left_hand_landmarks](#left_hand_landmarks).
#### segmentation_mask
The output segmentation mask, predicted only when
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
same width and height as the input image, and contains values in `[0.0, 1.0]`
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
pixel respectively. Please refer to the platform-specific usage examples below
for usage details.
### Python Solution API ### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to Please first follow general [instructions](../getting_started/python.md) to
@ -218,6 +244,9 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [model_complexity](#model_complexity) * [model_complexity](#model_complexity)
* [smooth_landmarks](#smooth_landmarks) * [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation)
* [refine_face_landmarks](#refine_face_landmarks)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -225,13 +254,16 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic mp_holistic = mp.solutions.holistic
# For static images: # For static images:
IMAGE_FILES = [] IMAGE_FILES = []
with mp_holistic.Holistic( with mp_holistic.Holistic(
static_image_mode=True, static_image_mode=True,
model_complexity=2) as holistic: model_complexity=2,
enable_segmentation=True,
refine_face_landmarks=True) as holistic:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
image_height, image_width, _ = image.shape image_height, image_width, _ = image.shape
@ -244,16 +276,29 @@ with mp_holistic.Holistic(
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, ' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
) )
# Draw pose, left and right hands, and face landmarks on the image.
annotated_image = image.copy() annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose, left and right hands, and face landmarks on the image.
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) annotated_image,
results.face_landmarks,
mp_holistic.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) annotated_image,
mp_drawing.draw_landmarks( results.pose_landmarks,
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_holistic.POSE_CONNECTIONS,
mp_drawing.draw_landmarks( landmark_drawing_spec=mp_drawing_styles.
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) get_default_pose_landmarks_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks. # Plot pose world landmarks.
mp_drawing.plot_landmarks( mp_drawing.plot_landmarks(
@ -271,26 +316,30 @@ with mp_holistic.Holistic(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = holistic.process(image) results = holistic.process(image)
# Draw landmark annotation on the image. # Draw landmark annotation on the image.
image.flags.writeable = True image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) image,
results.face_landmarks,
mp_holistic.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) image,
mp_drawing.draw_landmarks( results.pose_landmarks,
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_holistic.POSE_CONNECTIONS,
mp_drawing.draw_landmarks( landmark_drawing_spec=mp_drawing_styles
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) .get_default_pose_landmarks_style())
cv2.imshow('MediaPipe Holistic', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Holistic', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -306,6 +355,9 @@ Supported configuration options:
* [modelComplexity](#model_complexity) * [modelComplexity](#model_complexity)
* [smoothLandmarks](#smooth_landmarks) * [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation)
* [refineFaceLandmarks](#refineFaceLandmarks)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -338,8 +390,20 @@ const canvasCtx = canvasElement.getContext('2d');
function onResults(results) { function onResults(results) {
canvasCtx.save(); canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height); canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(results.segmentationMask, 0, 0,
canvasElement.width, canvasElement.height);
// Only overwrite existing pixels.
canvasCtx.globalCompositeOperation = 'source-in';
canvasCtx.fillStyle = '#00FF00';
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
// Only overwrite missing pixels.
canvasCtx.globalCompositeOperation = 'destination-atop';
canvasCtx.drawImage( canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height); results.image, 0, 0, canvasElement.width, canvasElement.height);
canvasCtx.globalCompositeOperation = 'source-over';
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS, drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4}); {color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks, drawLandmarks(canvasCtx, results.poseLandmarks,
@ -363,6 +427,9 @@ const holistic = new Holistic({locateFile: (file) => {
holistic.setOptions({ holistic.setOptions({
modelComplexity: 1, modelComplexity: 1,
smoothLandmarks: true, smoothLandmarks: true,
enableSegmentation: true,
smoothSegmentation: true,
refineFaceLandmarks: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });

View File

@ -41,7 +41,10 @@ one over the other.
* Face landmark model: * Face landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1) [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* [Model card](https://mediapipe.page.link/facemesh-mc) * Face landmark model w/ attention (aka Attention Mesh):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite)
* [Model card](https://mediapipe.page.link/facemesh-mc),
[Model card (w/ attention)](https://mediapipe.page.link/attentionmesh-mc)
### [Iris](https://google.github.io/mediapipe/solutions/iris) ### [Iris](https://google.github.io/mediapipe/solutions/iris)
@ -52,13 +55,14 @@ one over the other.
### [Hands](https://google.github.io/mediapipe/solutions/hands) ### [Hands](https://google.github.io/mediapipe/solutions/hands)
* Palm detection model: * Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite), [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1) [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model: * Hand landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite), [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite), [TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1) [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc), [Model card (sparse)](https://mediapipe.page.link/handmc-sparse) * [Model card](https://mediapipe.page.link/handmc)
### [Pose](https://google.github.io/mediapipe/solutions/pose) ### [Pose](https://google.github.io/mediapipe/solutions/pose)

View File

@ -224,29 +224,33 @@ where object detection simply runs on every image. Default to `0.99`.
#### model_name #### model_name
Name of the model to use for predicting 3D bounding box landmarks. Currently supports Name of the model to use for predicting 3D bounding box landmarks. Currently
`{'Shoe', 'Chair', 'Cup', 'Camera'}`. supports `{'Shoe', 'Chair', 'Cup', 'Camera'}`. Default to `Shoe`.
#### focal_length #### focal_length
Camera focal length `(fx, fy)`, by default is defined in By default, camera focal length defined in [NDC space](#ndc-space), i.e., `(fx,
[NDC space](#ndc-space). To use focal length `(fx_pixel, fy_pixel)` in fy)`. Default to `(1.0, 1.0)`. To specify focal length in
[pixel space](#pixel-space), users should provide `image_size` = `(image_width, [pixel space](#pixel-space) instead, i.e., `(fx_pixel, fy_pixel)`, users should
image_height)` to enable conversions inside the API. For further details about provide [`image_size`](#image_size) = `(image_width, image_height)` to enable
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems). conversions inside the API. For further details about NDC and pixel space,
please see [Coordinate Systems](#coordinate-systems).
#### principal_point #### principal_point
Camera principal point `(px, py)`, by default is defined in By default, camera principal point defined in [NDC space](#ndc-space), i.e.,
[NDC space](#ndc-space). To use principal point `(px_pixel, py_pixel)` in `(px, py)`. Default to `(0.0, 0.0)`. To specify principal point in
[pixel space](#pixel-space), users should provide `image_size` = `(image_width, [pixel space](#pixel-space), i.e.,`(px_pixel, py_pixel)`, users should provide
image_height)` to enable conversions inside the API. For further details about [`image_size`](#image_size) = `(image_width, image_height)` to enable
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems). conversions inside the API. For further details about NDC and pixel space,
please see [Coordinate Systems](#coordinate-systems).
#### image_size #### image_size
(**Optional**) size `(image_width, image_height)` of the input image, **ONLY** **Specify only when [`focal_length`](#focal_length) and
needed when use `focal_length` and `principal_point` in pixel space. [`principal_point`](#principal_point) are specified in pixel space.**
Size of the input image, i.e., `(image_width, image_height)`.
### Output ### Output
@ -334,11 +338,10 @@ with mp_objectron.Objectron(static_image_mode=False,
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Convert the BGR image to RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = objectron.process(image) results = objectron.process(image)
# Draw the box landmarks on the image. # Draw the box landmarks on the image.
@ -350,12 +353,96 @@ with mp_objectron.Objectron(static_image_mode=False,
image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS) image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
mp_drawing.draw_axis(image, detected_object.rotation, mp_drawing.draw_axis(image, detected_object.rotation,
detected_object.translation) detected_object.translation)
cv2.imshow('MediaPipe Objectron', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Objectron', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
``` ```
## JavaScript Solution API
Please first see general [introduction](../getting_started/javascript.md) on
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
and the following usage example.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [maxNumObjects](#max_num_objects)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
* [modelName](#model_name)
* [focalLength](#focal_length)
* [principalPoint](#principal_point)
* [imageSize](#image_size)
```html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script>
</head>
<body>
<div class="container">
<video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
</div>
</body>
</html>
```
```javascript
<script type="module">
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
function onResults(results) {
canvasCtx.save();
canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height);
if (!!results.objectDetections) {
for (const detectedObject of results.objectDetections) {
// Reformat keypoint information as landmarks, for easy drawing.
const landmarks: mpObjectron.Point2D[] =
detectedObject.keypoints.map(x => x.point2d);
// Draw bounding box.
drawingUtils.drawConnectors(canvasCtx, landmarks,
mpObjectron.BOX_CONNECTIONS, {color: '#FF0000'});
// Draw centroid.
drawingUtils.drawLandmarks(canvasCtx, [landmarks[0]], {color: '#FFFFFF'});
}
}
canvasCtx.restore();
}
const objectron = new Objectron({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/objectron/${file}`;
}});
objectron.setOptions({
modelName: 'Chair',
maxNumObjects: 3,
});
objectron.onResults(onResults);
const camera = new Camera(videoElement, {
onFrame: async () => {
await objectron.send({image: videoElement});
},
width: 1280,
height: 720
});
camera.start();
</script>
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for
@ -442,7 +529,7 @@ Example app bounding boxes are rendered with [GlAnimationOverlayCalculator](http
> ``` > ```
> and then run > and then run
> >
> ```build > ```bash
> bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR] > bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR]
> ``` > ```
> INPUT_DIR should be the folder with initial asset .obj files to be processed, > INPUT_DIR should be the folder with initial asset .obj files to be processed,
@ -561,11 +648,15 @@ py = -py_pixel * 2.0 / image_height + 1.0
[Announcing the Objectron Dataset](https://ai.googleblog.com/2020/11/announcing-objectron-dataset.html) [Announcing the Objectron Dataset](https://ai.googleblog.com/2020/11/announcing-objectron-dataset.html)
* Google AI Blog: * Google AI Blog:
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html) [Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
* Paper: [Objectron: A Large Scale Dataset of Object-Centric Videos in the Wild with Pose Annotations](https://arxiv.org/abs/2012.09988), to appear in CVPR 2021 * Paper: [Objectron: A Large Scale Dataset of Object-Centric Videos in the
Wild with Pose Annotations](https://arxiv.org/abs/2012.09988), to appear in
CVPR 2021
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak * Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak
Shape Supervision](https://arxiv.org/abs/2003.03522) Shape Supervision](https://arxiv.org/abs/2003.03522)
* Paper: * Paper:
[Instant 3D Object Tracking with Applications in Augmented Reality](https://drive.google.com/open?id=1O_zHmlgXIzAdKljp20U_JUkEHOGG52R8) [Instant 3D Object Tracking with Applications in Augmented Reality](https://drive.google.com/open?id=1O_zHmlgXIzAdKljp20U_JUkEHOGG52R8)
([presentation](https://www.youtube.com/watch?v=9ndF1AIo7h0)), Fourth Workshop on Computer Vision for AR/VR, CVPR 2020 ([presentation](https://www.youtube.com/watch?v=9ndF1AIo7h0)), Fourth
Workshop on Computer Vision for AR/VR, CVPR 2020
* [Models and model cards](./models.md#objectron) * [Models and model cards](./models.md#objectron)
* [Web demo](https://code.mediapipe.dev/codepen/objectron)
* [Python Colab](https://mediapipe.page.link/objectron_py_colab) * [Python Colab](https://mediapipe.page.link/objectron_py_colab)

View File

@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in
augmented reality. augmented reality.
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
33 3D landmarks on the whole body from RGB video frames utilizing our 33 3D landmarks and background segmentation mask on the whole body from RGB
video frames utilizing our
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) [BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
research that also powers the research that also powers the
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection). [ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be
effective in our [MediaPipe Hands](./hands.md) and effective in our [MediaPipe Hands](./hands.md) and
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline [MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
first locates the person/pose region-of-interest (ROI) within the frame. The first locates the person/pose region-of-interest (ROI) within the frame. The
tracker subsequently predicts the pose landmarks within the ROI using the tracker subsequently predicts the pose landmarks and segmentation mask within
ROI-cropped frame as input. Note that for video use cases the detector is the ROI using the ROI-cropped frame as input. Note that for video use cases the
invoked only as needed, i.e., for the very first frame and when the tracker detector is invoked only as needed, i.e., for the very first frame and when the
could no longer identify body pose presence in the previous frame. For other tracker could no longer identify body pose presence in the previous frame. For
frames the pipeline simply derives the ROI from the previous frames pose other frames the pipeline simply derives the ROI from the previous frames pose
landmarks. landmarks.
The pipeline is implemented as a MediaPipe The pipeline is implemented as a MediaPipe
@ -87,11 +88,11 @@ from [COCO topology](https://cocodataset.org/#keypoints-2020).
Method | Yoga <br/> [`mAP`] | Yoga <br/> [`PCK@0.2`] | Dance <br/> [`mAP`] | Dance <br/> [`PCK@0.2`] | HIIT <br/> [`mAP`] | HIIT <br/> [`PCK@0.2`] Method | Yoga <br/> [`mAP`] | Yoga <br/> [`PCK@0.2`] | Dance <br/> [`mAP`] | Dance <br/> [`PCK@0.2`] | HIIT <br/> [`mAP`] | HIIT <br/> [`PCK@0.2`]
----------------------------------------------------------------------------------------------------- | -----------------: | ---------------------: | ------------------: | ----------------------: | -----------------: | ---------------------: ----------------------------------------------------------------------------------------------------- | -----------------: | ---------------------: | ------------------: | ----------------------: | -----------------: | ---------------------:
BlazePose.Heavy | 68.1 | **96.4** | 73.0 | **97.2** | 74.0 | **97.5** BlazePose GHUM Heavy | 68.1 | **96.4** | 73.0 | **97.2** | 74.0 | **97.5**
BlazePose.Full | 62.6 | **95.5** | 67.4 | **96.3** | 68.0 | **95.7** BlazePose GHUM Full | 62.6 | **95.5** | 67.4 | **96.3** | 68.0 | **95.7**
BlazePose.Lite | 45.0 | **90.2** | 53.6 | **92.5** | 53.8 | **93.5** BlazePose GHUM Lite | 45.0 | **90.2** | 53.6 | **92.5** | 53.8 | **93.5**
[AlphaPose.ResNet50](https://github.com/MVIG-SJTU/AlphaPose) | 63.4 | **96.0** | 57.8 | **95.5** | 63.4 | **96.0** [AlphaPose ResNet50](https://github.com/MVIG-SJTU/AlphaPose) | 63.4 | **96.0** | 57.8 | **95.5** | 63.4 | **96.0**
[Apple.Vision](https://developer.apple.com/documentation/vision/detecting_human_body_poses_in_images) | 32.8 | **82.7** | 36.4 | **91.4** | 44.5 | **88.6** [Apple Vision](https://developer.apple.com/documentation/vision/detecting_human_body_poses_in_images) | 32.8 | **82.7** | 36.4 | **91.4** | 44.5 | **88.6**
![pose_tracking_pck_chart.png](../images/mobile/pose_tracking_pck_chart.png) | ![pose_tracking_pck_chart.png](../images/mobile/pose_tracking_pck_chart.png) |
:--------------------------------------------------------------------------: | :--------------------------------------------------------------------------: |
@ -100,11 +101,11 @@ BlazePose.Lite
We designed our models specifically for live perception use cases, so all of We designed our models specifically for live perception use cases, so all of
them work in real-time on the majority of modern devices. them work in real-time on the majority of modern devices.
Method | Latency <br/> Pixel 3 [TFLite GPU](https://www.tensorflow.org/lite/performance/gpu_advanced) | Latency <br/> MacBook Pro (15-inch 2017) Method | Latency <br/> Pixel 3 [TFLite GPU](https://www.tensorflow.org/lite/performance/gpu_advanced) | Latency <br/> MacBook Pro (15-inch 2017)
--------------- | -------------------------------------------------------------------------------------------: | ---------------------------------------: -------------------- | -------------------------------------------------------------------------------------------: | ---------------------------------------:
BlazePose.Heavy | 53 ms | 38 ms BlazePose GHUM Heavy | 53 ms | 38 ms
BlazePose.Full | 25 ms | 27 ms BlazePose GHUM Full | 25 ms | 27 ms
BlazePose.Lite | 20 ms | 25 ms BlazePose GHUM Lite | 20 ms | 25 ms
## Models ## Models
@ -124,21 +125,24 @@ hip midpoints.
:----------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: |
*Fig 3. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* | *Fig 3. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* |
### Pose Landmark Model (BlazePose GHUM 3D) ### Pose Landmark Model (BlazePose [GHUM](https://github.com/google-research/google-research/tree/master/ghum) 3D)
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
(see figure below). (see figure below).
Please find more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
this [paper](https://arxiv.org/abs/2006.10204) and
[the model card](./models.md#pose), and the attributes in each landmark
[below](#pose_landmarks).
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) | ![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
:----------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------: |
*Fig 4. 33 pose landmarks.* | *Fig 4. 33 pose landmarks.* |
Optionally, MediaPipe Pose can predicts a full-body
[segmentation mask](#segmentation_mask) represented as a two-class segmentation
(human or background).
Please find more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
this [paper](https://arxiv.org/abs/2006.10204),
[the model card](./models.md#pose) and the [Output](#output) section below.
## Solution APIs ## Solution APIs
### Cross-platform Configuration Options ### Cross-platform Configuration Options
@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode) images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`. is also set to `true`. Default to `true`.
#### enable_segmentation
If set to `true`, in addition to the pose landmarks the solution also generates
the segmentation mask. Default to `false`.
#### smooth_segmentation
If set to `true`, the solution filters segmentation masks across different input
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -211,6 +227,19 @@ the following:
* `visibility`: Identical to that defined in the corresponding * `visibility`: Identical to that defined in the corresponding
[pose_landmarks](#pose_landmarks). [pose_landmarks](#pose_landmarks).
#### segmentation_mask
The output segmentation mask, predicted only when
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
same width and height as the input image, and contains values in `[0.0, 1.0]`
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
pixel respectively. Please refer to the platform-specific usage examples below
for usage details.
*Fig 6. Example of MediaPipe Pose segmentation mask.* |
:---------------------------------------------------: |
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_segmentation.mp4" type="video/mp4"></video> |
### Python Solution API ### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to Please first follow general [instructions](../getting_started/python.md) to
@ -222,6 +251,8 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [model_complexity](#model_complexity) * [model_complexity](#model_complexity)
* [smooth_landmarks](#smooth_landmarks) * [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -229,13 +260,16 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose mp_pose = mp.solutions.pose
# For static images: # For static images:
IMAGE_FILES = [] IMAGE_FILES = []
BG_COLOR = (192, 192, 192) # gray
with mp_pose.Pose( with mp_pose.Pose(
static_image_mode=True, static_image_mode=True,
model_complexity=2, model_complexity=2,
enable_segmentation=True,
min_detection_confidence=0.5) as pose: min_detection_confidence=0.5) as pose:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
@ -247,13 +281,24 @@ with mp_pose.Pose(
continue continue
print( print(
f'Nose coordinates: (' f'Nose coordinates: ('
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, ' f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})' f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].y * image_height})'
) )
# Draw pose landmarks on the image.
annotated_image = image.copy() annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose landmarks on the image.
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) annotated_image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks. # Plot pose world landmarks.
mp_drawing.plot_landmarks( mp_drawing.plot_landmarks(
@ -271,20 +316,22 @@ with mp_pose.Pose(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pose.process(image) results = pose.process(image)
# Draw the pose annotation on the image. # Draw the pose annotation on the image.
image.flags.writeable = True image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) image,
cv2.imshow('MediaPipe Pose', image) results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -300,6 +347,8 @@ Supported configuration options:
* [modelComplexity](#model_complexity) * [modelComplexity](#model_complexity)
* [smoothLandmarks](#smooth_landmarks) * [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -319,6 +368,7 @@ Supported configuration options:
<div class="container"> <div class="container">
<video class="input_video"></video> <video class="input_video"></video>
<canvas class="output_canvas" width="1280px" height="720px"></canvas> <canvas class="output_canvas" width="1280px" height="720px"></canvas>
<div class="landmark-grid-container"></div>
</div> </div>
</body> </body>
</html> </html>
@ -340,8 +390,20 @@ function onResults(results) {
canvasCtx.save(); canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height); canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(results.segmentationMask, 0, 0,
canvasElement.width, canvasElement.height);
// Only overwrite existing pixels.
canvasCtx.globalCompositeOperation = 'source-in';
canvasCtx.fillStyle = '#00FF00';
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
// Only overwrite missing pixels.
canvasCtx.globalCompositeOperation = 'destination-atop';
canvasCtx.drawImage( canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height); results.image, 0, 0, canvasElement.width, canvasElement.height);
canvasCtx.globalCompositeOperation = 'source-over';
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS, drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4}); {color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks, drawLandmarks(canvasCtx, results.poseLandmarks,
@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => {
pose.setOptions({ pose.setOptions({
modelComplexity: 1, modelComplexity: 1,
smoothLandmarks: true, smoothLandmarks: true,
enableSegmentation: true,
smoothSegmentation: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });
@ -422,6 +486,7 @@ on how to build MediaPipe examples.
[BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204) [BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204)
([presentation](https://youtu.be/YPpUOTRn5tA)) ([presentation](https://youtu.be/YPpUOTRn5tA))
* [Models and model cards](./models.md#pose) * [Models and model cards](./models.md#pose)
* [GHUM & GHUML: Generative 3D Human Shape and Articulated Pose Models](https://github.com/google-research/google-research/tree/master/ghum)
* [Web demo](https://code.mediapipe.dev/codepen/pose) * [Web demo](https://code.mediapipe.dev/codepen/pose)
* [Python Colab](https://mediapipe.page.link/pose_py_colab) * [Python Colab](https://mediapipe.page.link/pose_py_colab)

View File

@ -96,6 +96,7 @@ Supported configuration options:
```python ```python
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_selfie_segmentation = mp.solutions.selfie_segmentation mp_selfie_segmentation = mp.solutions.selfie_segmentation
@ -261,7 +262,7 @@ to visualize its associated subgraphs, please see
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1DoeyGzMmWUsjfVgZfGGecrn7GKzYcEAo/view?usp=sharing) [(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1DoeyGzMmWUsjfVgZfGGecrn7GKzYcEAo/view?usp=sharing)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu:selfiesegmentationgpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu/BUILD) [`mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu:selfiesegmentationgpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu/BUILD)
* iOS target: * iOS target:
[`mediapipe/examples/ios/selfiesegmentationgpu:SelfieSegmentationGpuApp`](http:/mediapipe/examples/ios/selfiesegmentationgpu/BUILD) [`mediapipe/examples/ios/selfiesegmentationgpu:SelfieSegmentationGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/selfiesegmentationgpu/BUILD)
### Desktop ### Desktop

View File

@ -13,6 +13,9 @@ has_toc: false
{:toc} {:toc}
--- ---
MediaPipe offers open source cross-platform, customizable ML solutions for live
and streaming media.
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. --> <!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to ../external_index.md. --> <!-- Whenever this table is updated, paste a copy to ../external_index.md. -->
@ -29,7 +32,7 @@ has_toc: false
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | [Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | [Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | | [Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | [KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | [AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |

View File

@ -140,6 +140,16 @@ mediapipe_proto_library(
], ],
) )
mediapipe_proto_library(
name = "graph_profile_calculator_proto",
srcs = ["graph_profile_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library( cc_library(
name = "add_header_calculator", name = "add_header_calculator",
srcs = ["add_header_calculator.cc"], srcs = ["add_header_calculator.cc"],
@ -521,9 +531,13 @@ cc_test(
":split_vector_calculator", ":split_vector_calculator",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner", "//mediapipe/framework:calculator_runner",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
"@com_google_absl//absl/types:optional",
], ],
) )
@ -1200,3 +1214,45 @@ cc_test(
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
], ],
) )
cc_library(
name = "graph_profile_calculator",
srcs = ["graph_profile_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":graph_profile_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_profile_cc_proto",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:packet",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
cc_test(
name = "graph_profile_calculator_test",
srcs = ["graph_profile_calculator_test.cc"],
deps = [
":graph_profile_calculator",
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_profile_cc_proto",
"//mediapipe/framework:test_calculators",
"//mediapipe/framework/deps:clock",
"//mediapipe/framework/deps:message_matchers",
"//mediapipe/framework/port:core_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:threadpool",
"//mediapipe/framework/tool:simulation_clock_executor",
"//mediapipe/framework/tool:sink",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
],
)

View File

@ -24,6 +24,9 @@
namespace mediapipe { namespace mediapipe {
constexpr char kDataTag[] = "DATA";
constexpr char kHeaderTag[] = "HEADER";
class AddHeaderCalculatorTest : public ::testing::Test {}; class AddHeaderCalculatorTest : public ::testing::Test {};
TEST_F(AddHeaderCalculatorTest, HeaderStream) { TEST_F(AddHeaderCalculatorTest, HeaderStream) {
@ -36,11 +39,11 @@ TEST_F(AddHeaderCalculatorTest, HeaderStream) {
CalculatorRunner runner(node); CalculatorRunner runner(node);
// Set header and add 5 packets. // Set header and add 5 packets.
runner.MutableInputs()->Tag("HEADER").header = runner.MutableInputs()->Tag(kHeaderTag).header =
Adopt(new std::string("my_header")); Adopt(new std::string("my_header"));
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000)); Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
runner.MutableInputs()->Tag("DATA").packets.push_back(packet); runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
} }
// Run calculator. // Run calculator.
@ -85,13 +88,14 @@ TEST_F(AddHeaderCalculatorTest, NoPacketsOnHeaderStream) {
CalculatorRunner runner(node); CalculatorRunner runner(node);
// Set header and add 5 packets. // Set header and add 5 packets.
runner.MutableInputs()->Tag("HEADER").header = runner.MutableInputs()->Tag(kHeaderTag).header =
Adopt(new std::string("my_header")); Adopt(new std::string("my_header"));
runner.MutableInputs()->Tag("HEADER").packets.push_back( runner.MutableInputs()
Adopt(new std::string("not allowed"))); ->Tag(kHeaderTag)
.packets.push_back(Adopt(new std::string("not allowed")));
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000)); Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
runner.MutableInputs()->Tag("DATA").packets.push_back(packet); runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
} }
// Run calculator. // Run calculator.
@ -108,11 +112,11 @@ TEST_F(AddHeaderCalculatorTest, InputSidePacket) {
CalculatorRunner runner(node); CalculatorRunner runner(node);
// Set header and add 5 packets. // Set header and add 5 packets.
runner.MutableSidePackets()->Tag("HEADER") = runner.MutableSidePackets()->Tag(kHeaderTag) =
Adopt(new std::string("my_header")); Adopt(new std::string("my_header"));
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000)); Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
runner.MutableInputs()->Tag("DATA").packets.push_back(packet); runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
} }
// Run calculator. // Run calculator.
@ -143,13 +147,13 @@ TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
CalculatorRunner runner(node); CalculatorRunner runner(node);
// Set both headers and add 5 packets. // Set both headers and add 5 packets.
runner.MutableSidePackets()->Tag("HEADER") = runner.MutableSidePackets()->Tag(kHeaderTag) =
Adopt(new std::string("my_header")); Adopt(new std::string("my_header"));
runner.MutableSidePackets()->Tag("HEADER") = runner.MutableSidePackets()->Tag(kHeaderTag) =
Adopt(new std::string("my_header")); Adopt(new std::string("my_header"));
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000)); Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
runner.MutableInputs()->Tag("DATA").packets.push_back(packet); runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
} }
// Run should fail because header can only be provided one way. // Run should fail because header can only be provided one way.

View File

@ -42,4 +42,13 @@ REGISTER_CALCULATOR(BeginLoopDetectionCalculator);
typedef BeginLoopCalculator<std::vector<Matrix>> BeginLoopMatrixCalculator; typedef BeginLoopCalculator<std::vector<Matrix>> BeginLoopMatrixCalculator;
REGISTER_CALCULATOR(BeginLoopMatrixCalculator); REGISTER_CALCULATOR(BeginLoopMatrixCalculator);
// A calculator to process std::vector<std::vector<Matrix>>.
typedef BeginLoopCalculator<std::vector<std::vector<Matrix>>>
BeginLoopMatrixVectorCalculator;
REGISTER_CALCULATOR(BeginLoopMatrixVectorCalculator);
// A calculator to process std::vector<uint64_t>.
typedef BeginLoopCalculator<std::vector<uint64_t>> BeginLoopUint64tCalculator;
REGISTER_CALCULATOR(BeginLoopUint64tCalculator);
} // namespace mediapipe } // namespace mediapipe

View File

@ -19,6 +19,13 @@
namespace mediapipe { namespace mediapipe {
constexpr char kIncrementTag[] = "INCREMENT";
constexpr char kInitialValueTag[] = "INITIAL_VALUE";
constexpr char kBatchSizeTag[] = "BATCH_SIZE";
constexpr char kErrorCountTag[] = "ERROR_COUNT";
constexpr char kMaxCountTag[] = "MAX_COUNT";
constexpr char kErrorOnOpenTag[] = "ERROR_ON_OPEN";
// Source calculator that produces MAX_COUNT*BATCH_SIZE int packets of // Source calculator that produces MAX_COUNT*BATCH_SIZE int packets of
// sequential numbers from INITIAL_VALUE (default 0) with a common // sequential numbers from INITIAL_VALUE (default 0) with a common
// difference of INCREMENT (default 1) between successive numbers (with // difference of INCREMENT (default 1) between successive numbers (with
@ -33,53 +40,53 @@ class CountingSourceCalculator : public CalculatorBase {
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Outputs().Index(0).Set<int>(); cc->Outputs().Index(0).Set<int>();
if (cc->InputSidePackets().HasTag("ERROR_ON_OPEN")) { if (cc->InputSidePackets().HasTag(kErrorOnOpenTag)) {
cc->InputSidePackets().Tag("ERROR_ON_OPEN").Set<bool>(); cc->InputSidePackets().Tag(kErrorOnOpenTag).Set<bool>();
} }
RET_CHECK(cc->InputSidePackets().HasTag("MAX_COUNT") || RET_CHECK(cc->InputSidePackets().HasTag(kMaxCountTag) ||
cc->InputSidePackets().HasTag("ERROR_COUNT")); cc->InputSidePackets().HasTag(kErrorCountTag));
if (cc->InputSidePackets().HasTag("MAX_COUNT")) { if (cc->InputSidePackets().HasTag(kMaxCountTag)) {
cc->InputSidePackets().Tag("MAX_COUNT").Set<int>(); cc->InputSidePackets().Tag(kMaxCountTag).Set<int>();
} }
if (cc->InputSidePackets().HasTag("ERROR_COUNT")) { if (cc->InputSidePackets().HasTag(kErrorCountTag)) {
cc->InputSidePackets().Tag("ERROR_COUNT").Set<int>(); cc->InputSidePackets().Tag(kErrorCountTag).Set<int>();
} }
if (cc->InputSidePackets().HasTag("BATCH_SIZE")) { if (cc->InputSidePackets().HasTag(kBatchSizeTag)) {
cc->InputSidePackets().Tag("BATCH_SIZE").Set<int>(); cc->InputSidePackets().Tag(kBatchSizeTag).Set<int>();
} }
if (cc->InputSidePackets().HasTag("INITIAL_VALUE")) { if (cc->InputSidePackets().HasTag(kInitialValueTag)) {
cc->InputSidePackets().Tag("INITIAL_VALUE").Set<int>(); cc->InputSidePackets().Tag(kInitialValueTag).Set<int>();
} }
if (cc->InputSidePackets().HasTag("INCREMENT")) { if (cc->InputSidePackets().HasTag(kIncrementTag)) {
cc->InputSidePackets().Tag("INCREMENT").Set<int>(); cc->InputSidePackets().Tag(kIncrementTag).Set<int>();
} }
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Open(CalculatorContext* cc) override { absl::Status Open(CalculatorContext* cc) override {
if (cc->InputSidePackets().HasTag("ERROR_ON_OPEN") && if (cc->InputSidePackets().HasTag(kErrorOnOpenTag) &&
cc->InputSidePackets().Tag("ERROR_ON_OPEN").Get<bool>()) { cc->InputSidePackets().Tag(kErrorOnOpenTag).Get<bool>()) {
return absl::NotFoundError("expected error"); return absl::NotFoundError("expected error");
} }
if (cc->InputSidePackets().HasTag("ERROR_COUNT")) { if (cc->InputSidePackets().HasTag(kErrorCountTag)) {
error_count_ = cc->InputSidePackets().Tag("ERROR_COUNT").Get<int>(); error_count_ = cc->InputSidePackets().Tag(kErrorCountTag).Get<int>();
RET_CHECK_LE(0, error_count_); RET_CHECK_LE(0, error_count_);
} }
if (cc->InputSidePackets().HasTag("MAX_COUNT")) { if (cc->InputSidePackets().HasTag(kMaxCountTag)) {
max_count_ = cc->InputSidePackets().Tag("MAX_COUNT").Get<int>(); max_count_ = cc->InputSidePackets().Tag(kMaxCountTag).Get<int>();
RET_CHECK_LE(0, max_count_); RET_CHECK_LE(0, max_count_);
} }
if (cc->InputSidePackets().HasTag("BATCH_SIZE")) { if (cc->InputSidePackets().HasTag(kBatchSizeTag)) {
batch_size_ = cc->InputSidePackets().Tag("BATCH_SIZE").Get<int>(); batch_size_ = cc->InputSidePackets().Tag(kBatchSizeTag).Get<int>();
RET_CHECK_LT(0, batch_size_); RET_CHECK_LT(0, batch_size_);
} }
if (cc->InputSidePackets().HasTag("INITIAL_VALUE")) { if (cc->InputSidePackets().HasTag(kInitialValueTag)) {
counter_ = cc->InputSidePackets().Tag("INITIAL_VALUE").Get<int>(); counter_ = cc->InputSidePackets().Tag(kInitialValueTag).Get<int>();
} }
if (cc->InputSidePackets().HasTag("INCREMENT")) { if (cc->InputSidePackets().HasTag(kIncrementTag)) {
increment_ = cc->InputSidePackets().Tag("INCREMENT").Get<int>(); increment_ = cc->InputSidePackets().Tag(kIncrementTag).Get<int>();
RET_CHECK_LT(0, increment_); RET_CHECK_LT(0, increment_);
} }
RET_CHECK(error_count_ >= 0 || max_count_ >= 0); RET_CHECK(error_count_ >= 0 || max_count_ >= 0);

View File

@ -35,11 +35,14 @@
// } // }
namespace mediapipe { namespace mediapipe {
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
constexpr char kEncodedTag[] = "ENCODED";
class DequantizeByteArrayCalculator : public CalculatorBase { class DequantizeByteArrayCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("ENCODED").Set<std::string>(); cc->Inputs().Tag(kEncodedTag).Set<std::string>();
cc->Outputs().Tag("FLOAT_VECTOR").Set<std::vector<float>>(); cc->Outputs().Tag(kFloatVectorTag).Set<std::vector<float>>();
return absl::OkStatus(); return absl::OkStatus();
} }
@ -66,7 +69,7 @@ class DequantizeByteArrayCalculator : public CalculatorBase {
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
const std::string& encoded = const std::string& encoded =
cc->Inputs().Tag("ENCODED").Value().Get<std::string>(); cc->Inputs().Tag(kEncodedTag).Value().Get<std::string>();
std::vector<float> float_vector; std::vector<float> float_vector;
float_vector.reserve(encoded.length()); float_vector.reserve(encoded.length());
for (int i = 0; i < encoded.length(); ++i) { for (int i = 0; i < encoded.length(); ++i) {
@ -74,7 +77,7 @@ class DequantizeByteArrayCalculator : public CalculatorBase {
static_cast<unsigned char>(encoded.at(i)) * scalar_ + bias_); static_cast<unsigned char>(encoded.at(i)) * scalar_ + bias_);
} }
cc->Outputs() cc->Outputs()
.Tag("FLOAT_VECTOR") .Tag(kFloatVectorTag)
.AddPacket(MakePacket<std::vector<float>>(float_vector) .AddPacket(MakePacket<std::vector<float>>(float_vector)
.At(cc->InputTimestamp())); .At(cc->InputTimestamp()));
return absl::OkStatus(); return absl::OkStatus();

View File

@ -25,6 +25,9 @@
namespace mediapipe { namespace mediapipe {
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
constexpr char kEncodedTag[] = "ENCODED";
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) { TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
CalculatorGraphConfig::Node node_config = CalculatorGraphConfig::Node node_config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb( ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
@ -39,8 +42,10 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
)pb"); )pb");
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::string empty_string; std::string empty_string;
runner.MutableInputs()->Tag("ENCODED").packets.push_back( runner.MutableInputs()
MakePacket<std::string>(empty_string).At(Timestamp(0))); ->Tag(kEncodedTag)
.packets.push_back(
MakePacket<std::string>(empty_string).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
EXPECT_FALSE(status.ok()); EXPECT_FALSE(status.ok());
EXPECT_THAT( EXPECT_THAT(
@ -64,8 +69,10 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) {
)pb"); )pb");
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::string empty_string; std::string empty_string;
runner.MutableInputs()->Tag("ENCODED").packets.push_back( runner.MutableInputs()
MakePacket<std::string>(empty_string).At(Timestamp(0))); ->Tag(kEncodedTag)
.packets.push_back(
MakePacket<std::string>(empty_string).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
EXPECT_FALSE(status.ok()); EXPECT_FALSE(status.ok());
EXPECT_THAT( EXPECT_THAT(
@ -89,8 +96,10 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) {
)pb"); )pb");
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::string empty_string; std::string empty_string;
runner.MutableInputs()->Tag("ENCODED").packets.push_back( runner.MutableInputs()
MakePacket<std::string>(empty_string).At(Timestamp(0))); ->Tag(kEncodedTag)
.packets.push_back(
MakePacket<std::string>(empty_string).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
EXPECT_FALSE(status.ok()); EXPECT_FALSE(status.ok());
EXPECT_THAT( EXPECT_THAT(
@ -114,14 +123,16 @@ TEST(DequantizeByteArrayCalculatorTest, TestDequantization) {
)pb"); )pb");
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01}; unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01};
runner.MutableInputs()->Tag("ENCODED").packets.push_back( runner.MutableInputs()
MakePacket<std::string>( ->Tag(kEncodedTag)
std::string(reinterpret_cast<char const*>(input), 4)) .packets.push_back(
.At(Timestamp(0))); MakePacket<std::string>(
std::string(reinterpret_cast<char const*>(input), 4))
.At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& outputs = const std::vector<Packet>& outputs =
runner.Outputs().Tag("FLOAT_VECTOR").packets; runner.Outputs().Tag(kFloatVectorTag).packets;
EXPECT_EQ(1, outputs.size()); EXPECT_EQ(1, outputs.size());
const std::vector<float>& result = outputs[0].Get<std::vector<float>>(); const std::vector<float>& result = outputs[0].Get<std::vector<float>>();
ASSERT_FALSE(result.empty()); ASSERT_FALSE(result.empty());

View File

@ -24,6 +24,11 @@
namespace mediapipe { namespace mediapipe {
constexpr char kFinishedTag[] = "FINISHED";
constexpr char kAllowTag[] = "ALLOW";
constexpr char kMaxInFlightTag[] = "MAX_IN_FLIGHT";
constexpr char kOptionsTag[] = "OPTIONS";
// FlowLimiterCalculator is used to limit the number of frames in flight // FlowLimiterCalculator is used to limit the number of frames in flight
// by dropping input frames when necessary. // by dropping input frames when necessary.
// //
@ -69,16 +74,19 @@ class FlowLimiterCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
auto& side_inputs = cc->InputSidePackets(); auto& side_inputs = cc->InputSidePackets();
side_inputs.Tag("OPTIONS").Set<FlowLimiterCalculatorOptions>().Optional(); side_inputs.Tag(kOptionsTag).Set<FlowLimiterCalculatorOptions>().Optional();
cc->Inputs().Tag("OPTIONS").Set<FlowLimiterCalculatorOptions>().Optional(); cc->Inputs()
.Tag(kOptionsTag)
.Set<FlowLimiterCalculatorOptions>()
.Optional();
RET_CHECK_GE(cc->Inputs().NumEntries(""), 1); RET_CHECK_GE(cc->Inputs().NumEntries(""), 1);
for (int i = 0; i < cc->Inputs().NumEntries(""); ++i) { for (int i = 0; i < cc->Inputs().NumEntries(""); ++i) {
cc->Inputs().Get("", i).SetAny(); cc->Inputs().Get("", i).SetAny();
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i))); cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
} }
cc->Inputs().Get("FINISHED", 0).SetAny(); cc->Inputs().Get("FINISHED", 0).SetAny();
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Set<int>().Optional(); cc->InputSidePackets().Tag(kMaxInFlightTag).Set<int>().Optional();
cc->Outputs().Tag("ALLOW").Set<bool>().Optional(); cc->Outputs().Tag(kAllowTag).Set<bool>().Optional();
cc->SetInputStreamHandler("ImmediateInputStreamHandler"); cc->SetInputStreamHandler("ImmediateInputStreamHandler");
cc->SetProcessTimestampBounds(true); cc->SetProcessTimestampBounds(true);
return absl::OkStatus(); return absl::OkStatus();
@ -87,9 +95,9 @@ class FlowLimiterCalculator : public CalculatorBase {
absl::Status Open(CalculatorContext* cc) final { absl::Status Open(CalculatorContext* cc) final {
options_ = cc->Options<FlowLimiterCalculatorOptions>(); options_ = cc->Options<FlowLimiterCalculatorOptions>();
options_ = tool::RetrieveOptions(options_, cc->InputSidePackets()); options_ = tool::RetrieveOptions(options_, cc->InputSidePackets());
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) { if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
options_.set_max_in_flight( options_.set_max_in_flight(
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Get<int>()); cc->InputSidePackets().Tag(kMaxInFlightTag).Get<int>());
} }
input_queues_.resize(cc->Inputs().NumEntries("")); input_queues_.resize(cc->Inputs().NumEntries(""));
RET_CHECK_OK(CopyInputHeadersToOutputs(cc->Inputs(), &(cc->Outputs()))); RET_CHECK_OK(CopyInputHeadersToOutputs(cc->Inputs(), &(cc->Outputs())));
@ -104,8 +112,8 @@ class FlowLimiterCalculator : public CalculatorBase {
// Outputs a packet indicating whether a frame was sent or dropped. // Outputs a packet indicating whether a frame was sent or dropped.
void SendAllow(bool allow, Timestamp ts, CalculatorContext* cc) { void SendAllow(bool allow, Timestamp ts, CalculatorContext* cc) {
if (cc->Outputs().HasTag("ALLOW")) { if (cc->Outputs().HasTag(kAllowTag)) {
cc->Outputs().Tag("ALLOW").AddPacket(MakePacket<bool>(allow).At(ts)); cc->Outputs().Tag(kAllowTag).AddPacket(MakePacket<bool>(allow).At(ts));
} }
} }
@ -155,7 +163,7 @@ class FlowLimiterCalculator : public CalculatorBase {
options_ = tool::RetrieveOptions(options_, cc->Inputs()); options_ = tool::RetrieveOptions(options_, cc->Inputs());
// Process the FINISHED input stream. // Process the FINISHED input stream.
Packet finished_packet = cc->Inputs().Tag("FINISHED").Value(); Packet finished_packet = cc->Inputs().Tag(kFinishedTag).Value();
if (finished_packet.Timestamp() == cc->InputTimestamp()) { if (finished_packet.Timestamp() == cc->InputTimestamp()) {
while (!frames_in_flight_.empty() && while (!frames_in_flight_.empty() &&
frames_in_flight_.front() <= finished_packet.Timestamp()) { frames_in_flight_.front() <= finished_packet.Timestamp()) {
@ -210,8 +218,8 @@ class FlowLimiterCalculator : public CalculatorBase {
Timestamp bound = Timestamp bound =
cc->Inputs().Get("", 0).Value().Timestamp().NextAllowedInStream(); cc->Inputs().Get("", 0).Value().Timestamp().NextAllowedInStream();
SetNextTimestampBound(bound, &cc->Outputs().Get("", 0)); SetNextTimestampBound(bound, &cc->Outputs().Get("", 0));
if (cc->Outputs().HasTag("ALLOW")) { if (cc->Outputs().HasTag(kAllowTag)) {
SetNextTimestampBound(bound, &cc->Outputs().Tag("ALLOW")); SetNextTimestampBound(bound, &cc->Outputs().Tag(kAllowTag));
} }
} }

View File

@ -30,7 +30,7 @@ message FlowLimiterCalculatorOptions {
optional int32 max_in_flight = 1 [default = 1]; optional int32 max_in_flight = 1 [default = 1];
// The maximum number of frames queued waiting for processing. // The maximum number of frames queued waiting for processing.
// The default value limits to 1 frame awaiting processing. // The default value limits to 0 frames awaiting processing.
optional int32 max_in_queue = 2 [default = 0]; optional int32 max_in_queue = 2 [default = 0];
// The maximum time in microseconds to wait for a frame to finish processing. // The maximum time in microseconds to wait for a frame to finish processing.

View File

@ -36,6 +36,13 @@
namespace mediapipe { namespace mediapipe {
namespace { namespace {
constexpr char kDropTimestampsTag[] = "DROP_TIMESTAMPS";
constexpr char kClockTag[] = "CLOCK";
constexpr char kWarmupTimeTag[] = "WARMUP_TIME";
constexpr char kSleepTimeTag[] = "SLEEP_TIME";
constexpr char kPacketTag[] = "PACKET";
// A simple Semaphore for synchronizing test threads. // A simple Semaphore for synchronizing test threads.
class AtomicSemaphore { class AtomicSemaphore {
public: public:
@ -204,17 +211,17 @@ TEST_F(FlowLimiterCalculatorSemaphoreTest, FramesDropped) {
class SleepCalculator : public CalculatorBase { class SleepCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("PACKET").SetAny(); cc->Inputs().Tag(kPacketTag).SetAny();
cc->Outputs().Tag("PACKET").SetSameAs(&cc->Inputs().Tag("PACKET")); cc->Outputs().Tag(kPacketTag).SetSameAs(&cc->Inputs().Tag(kPacketTag));
cc->InputSidePackets().Tag("SLEEP_TIME").Set<int64>(); cc->InputSidePackets().Tag(kSleepTimeTag).Set<int64>();
cc->InputSidePackets().Tag("WARMUP_TIME").Set<int64>(); cc->InputSidePackets().Tag(kWarmupTimeTag).Set<int64>();
cc->InputSidePackets().Tag("CLOCK").Set<mediapipe::Clock*>(); cc->InputSidePackets().Tag(kClockTag).Set<mediapipe::Clock*>();
cc->SetTimestampOffset(0); cc->SetTimestampOffset(0);
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Open(CalculatorContext* cc) final { absl::Status Open(CalculatorContext* cc) final {
clock_ = cc->InputSidePackets().Tag("CLOCK").Get<mediapipe::Clock*>(); clock_ = cc->InputSidePackets().Tag(kClockTag).Get<mediapipe::Clock*>();
return absl::OkStatus(); return absl::OkStatus();
} }
@ -222,10 +229,12 @@ class SleepCalculator : public CalculatorBase {
++packet_count; ++packet_count;
absl::Duration sleep_time = absl::Microseconds( absl::Duration sleep_time = absl::Microseconds(
packet_count == 1 packet_count == 1
? cc->InputSidePackets().Tag("WARMUP_TIME").Get<int64>() ? cc->InputSidePackets().Tag(kWarmupTimeTag).Get<int64>()
: cc->InputSidePackets().Tag("SLEEP_TIME").Get<int64>()); : cc->InputSidePackets().Tag(kSleepTimeTag).Get<int64>());
clock_->Sleep(sleep_time); clock_->Sleep(sleep_time);
cc->Outputs().Tag("PACKET").AddPacket(cc->Inputs().Tag("PACKET").Value()); cc->Outputs()
.Tag(kPacketTag)
.AddPacket(cc->Inputs().Tag(kPacketTag).Value());
return absl::OkStatus(); return absl::OkStatus();
} }
@ -240,24 +249,27 @@ REGISTER_CALCULATOR(SleepCalculator);
class DropCalculator : public CalculatorBase { class DropCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("PACKET").SetAny(); cc->Inputs().Tag(kPacketTag).SetAny();
cc->Outputs().Tag("PACKET").SetSameAs(&cc->Inputs().Tag("PACKET")); cc->Outputs().Tag(kPacketTag).SetSameAs(&cc->Inputs().Tag(kPacketTag));
cc->InputSidePackets().Tag("DROP_TIMESTAMPS").Set<bool>(); cc->InputSidePackets().Tag(kDropTimestampsTag).Set<bool>();
cc->SetProcessTimestampBounds(true); cc->SetProcessTimestampBounds(true);
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
if (!cc->Inputs().Tag("PACKET").Value().IsEmpty()) { if (!cc->Inputs().Tag(kPacketTag).Value().IsEmpty()) {
++packet_count; ++packet_count;
} }
bool drop = (packet_count == 3); bool drop = (packet_count == 3);
if (!drop && !cc->Inputs().Tag("PACKET").Value().IsEmpty()) { if (!drop && !cc->Inputs().Tag(kPacketTag).Value().IsEmpty()) {
cc->Outputs().Tag("PACKET").AddPacket(cc->Inputs().Tag("PACKET").Value()); cc->Outputs()
.Tag(kPacketTag)
.AddPacket(cc->Inputs().Tag(kPacketTag).Value());
} }
if (!drop || !cc->InputSidePackets().Tag("DROP_TIMESTAMPS").Get<bool>()) { if (!drop || !cc->InputSidePackets().Tag(kDropTimestampsTag).Get<bool>()) {
cc->Outputs().Tag("PACKET").SetNextTimestampBound( cc->Outputs()
cc->InputTimestamp().NextAllowedInStream()); .Tag(kPacketTag)
.SetNextTimestampBound(cc->InputTimestamp().NextAllowedInStream());
} }
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -21,6 +21,11 @@
namespace mediapipe { namespace mediapipe {
namespace { namespace {
constexpr char kStateChangeTag[] = "STATE_CHANGE";
constexpr char kDisallowTag[] = "DISALLOW";
constexpr char kAllowTag[] = "ALLOW";
enum GateState { enum GateState {
GATE_UNINITIALIZED, GATE_UNINITIALIZED,
GATE_ALLOW, GATE_ALLOW,
@ -59,8 +64,9 @@ std::string ToString(GateState state) {
// ALLOW or DISALLOW can also be specified as an input side packet. The rules // ALLOW or DISALLOW can also be specified as an input side packet. The rules
// for evaluation remain the same as above. // for evaluation remain the same as above.
// //
// ALLOW/DISALLOW inputs must be specified either using input stream or // ALLOW/DISALLOW inputs must be specified either using input stream or via
// via input side packet but not both. // input side packet but not both. If neither is specified, the behavior is then
// determined by the "allow" field in the calculator options.
// //
// Intended to be used with the default input stream handler, which synchronizes // Intended to be used with the default input stream handler, which synchronizes
// all data input streams with the ALLOW/DISALLOW control input stream. // all data input streams with the ALLOW/DISALLOW control input stream.
@ -83,30 +89,33 @@ class GateCalculator : public CalculatorBase {
GateCalculator() {} GateCalculator() {}
static absl::Status CheckAndInitAllowDisallowInputs(CalculatorContract* cc) { static absl::Status CheckAndInitAllowDisallowInputs(CalculatorContract* cc) {
bool input_via_side_packet = cc->InputSidePackets().HasTag("ALLOW") || bool input_via_side_packet = cc->InputSidePackets().HasTag(kAllowTag) ||
cc->InputSidePackets().HasTag("DISALLOW"); cc->InputSidePackets().HasTag(kDisallowTag);
bool input_via_stream = bool input_via_stream =
cc->Inputs().HasTag("ALLOW") || cc->Inputs().HasTag("DISALLOW"); cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
// Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW
// input.
RET_CHECK(input_via_side_packet ^ input_via_stream);
// Only one of input_side_packet or input_stream may specify
// ALLOW/DISALLOW input.
if (input_via_side_packet) { if (input_via_side_packet) {
RET_CHECK(cc->InputSidePackets().HasTag("ALLOW") ^ RET_CHECK(!input_via_stream);
cc->InputSidePackets().HasTag("DISALLOW")); RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
cc->InputSidePackets().HasTag(kDisallowTag));
if (cc->InputSidePackets().HasTag("ALLOW")) { if (cc->InputSidePackets().HasTag(kAllowTag)) {
cc->InputSidePackets().Tag("ALLOW").Set<bool>(); cc->InputSidePackets().Tag(kAllowTag).Set<bool>().Optional();
} else { } else {
cc->InputSidePackets().Tag("DISALLOW").Set<bool>(); cc->InputSidePackets().Tag(kDisallowTag).Set<bool>().Optional();
} }
} else { }
RET_CHECK(cc->Inputs().HasTag("ALLOW") ^ cc->Inputs().HasTag("DISALLOW")); if (input_via_stream) {
RET_CHECK(!input_via_side_packet);
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
cc->Inputs().HasTag(kDisallowTag));
if (cc->Inputs().HasTag("ALLOW")) { if (cc->Inputs().HasTag(kAllowTag)) {
cc->Inputs().Tag("ALLOW").Set<bool>(); cc->Inputs().Tag(kAllowTag).Set<bool>();
} else { } else {
cc->Inputs().Tag("DISALLOW").Set<bool>(); cc->Inputs().Tag(kDisallowTag).Set<bool>();
} }
} }
return absl::OkStatus(); return absl::OkStatus();
@ -125,23 +134,22 @@ class GateCalculator : public CalculatorBase {
cc->Outputs().Get("", i).SetSameAs(&cc->Inputs().Get("", i)); cc->Outputs().Get("", i).SetSameAs(&cc->Inputs().Get("", i));
} }
if (cc->Outputs().HasTag("STATE_CHANGE")) { if (cc->Outputs().HasTag(kStateChangeTag)) {
cc->Outputs().Tag("STATE_CHANGE").Set<bool>(); cc->Outputs().Tag(kStateChangeTag).Set<bool>();
} }
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Open(CalculatorContext* cc) final { absl::Status Open(CalculatorContext* cc) final {
use_side_packet_for_allow_disallow_ = false; if (cc->InputSidePackets().HasTag(kAllowTag)) {
if (cc->InputSidePackets().HasTag("ALLOW")) {
use_side_packet_for_allow_disallow_ = true; use_side_packet_for_allow_disallow_ = true;
allow_by_side_packet_decision_ = allow_by_side_packet_decision_ =
cc->InputSidePackets().Tag("ALLOW").Get<bool>(); cc->InputSidePackets().Tag(kAllowTag).Get<bool>();
} else if (cc->InputSidePackets().HasTag("DISALLOW")) { } else if (cc->InputSidePackets().HasTag(kDisallowTag)) {
use_side_packet_for_allow_disallow_ = true; use_side_packet_for_allow_disallow_ = true;
allow_by_side_packet_decision_ = allow_by_side_packet_decision_ =
!cc->InputSidePackets().Tag("DISALLOW").Get<bool>(); !cc->InputSidePackets().Tag(kDisallowTag).Get<bool>();
} }
cc->SetOffset(TimestampDiff(0)); cc->SetOffset(TimestampDiff(0));
@ -152,26 +160,34 @@ class GateCalculator : public CalculatorBase {
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
empty_packets_as_allow_ = options.empty_packets_as_allow(); empty_packets_as_allow_ = options.empty_packets_as_allow();
if (!use_side_packet_for_allow_disallow_ &&
!cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) {
use_option_for_allow_disallow_ = true;
allow_by_option_decision_ = options.allow();
}
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
bool allow = empty_packets_as_allow_; bool allow = empty_packets_as_allow_;
if (use_side_packet_for_allow_disallow_) { if (use_option_for_allow_disallow_) {
allow = allow_by_option_decision_;
} else if (use_side_packet_for_allow_disallow_) {
allow = allow_by_side_packet_decision_; allow = allow_by_side_packet_decision_;
} else { } else {
if (cc->Inputs().HasTag("ALLOW") && if (cc->Inputs().HasTag(kAllowTag) &&
!cc->Inputs().Tag("ALLOW").IsEmpty()) { !cc->Inputs().Tag(kAllowTag).IsEmpty()) {
allow = cc->Inputs().Tag("ALLOW").Get<bool>(); allow = cc->Inputs().Tag(kAllowTag).Get<bool>();
} }
if (cc->Inputs().HasTag("DISALLOW") && if (cc->Inputs().HasTag(kDisallowTag) &&
!cc->Inputs().Tag("DISALLOW").IsEmpty()) { !cc->Inputs().Tag(kDisallowTag).IsEmpty()) {
allow = !cc->Inputs().Tag("DISALLOW").Get<bool>(); allow = !cc->Inputs().Tag(kDisallowTag).Get<bool>();
} }
} }
const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW; const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW;
if (cc->Outputs().HasTag("STATE_CHANGE")) { if (cc->Outputs().HasTag(kStateChangeTag)) {
if (last_gate_state_ != GATE_UNINITIALIZED && if (last_gate_state_ != GATE_UNINITIALIZED &&
last_gate_state_ != new_gate_state) { last_gate_state_ != new_gate_state) {
VLOG(2) << "State transition in " << cc->NodeName() << " @ " VLOG(2) << "State transition in " << cc->NodeName() << " @ "
@ -179,7 +195,7 @@ class GateCalculator : public CalculatorBase {
<< ToString(last_gate_state_) << " to " << ToString(last_gate_state_) << " to "
<< ToString(new_gate_state); << ToString(new_gate_state);
cc->Outputs() cc->Outputs()
.Tag("STATE_CHANGE") .Tag(kStateChangeTag)
.AddPacket(MakePacket<bool>(allow).At(cc->InputTimestamp())); .AddPacket(MakePacket<bool>(allow).At(cc->InputTimestamp()));
} }
} }
@ -211,8 +227,10 @@ class GateCalculator : public CalculatorBase {
GateState last_gate_state_ = GATE_UNINITIALIZED; GateState last_gate_state_ = GATE_UNINITIALIZED;
int num_data_streams_; int num_data_streams_;
bool empty_packets_as_allow_; bool empty_packets_as_allow_;
bool use_side_packet_for_allow_disallow_; bool use_side_packet_for_allow_disallow_ = false;
bool allow_by_side_packet_decision_; bool allow_by_side_packet_decision_;
bool use_option_for_allow_disallow_ = false;
bool allow_by_option_decision_;
}; };
REGISTER_CALCULATOR(GateCalculator); REGISTER_CALCULATOR(GateCalculator);

View File

@ -29,4 +29,8 @@ message GateCalculatorOptions {
// disallowing the corresponding packets in the data input streams. Setting // disallowing the corresponding packets in the data input streams. Setting
// this option to true inverts that, allowing the data packets to go through. // this option to true inverts that, allowing the data packets to go through.
optional bool empty_packets_as_allow = 1; optional bool empty_packets_as_allow = 1;
// Whether to allow or disallow the input streams to pass when no
// ALLOW/DISALLOW input or side input is specified.
optional bool allow = 2 [default = false];
} }

View File

@ -22,6 +22,9 @@ namespace mediapipe {
namespace { namespace {
constexpr char kDisallowTag[] = "DISALLOW";
constexpr char kAllowTag[] = "ALLOW";
class GateCalculatorTest : public ::testing::Test { class GateCalculatorTest : public ::testing::Test {
protected: protected:
// Helper to run a graph and return status. // Helper to run a graph and return status.
@ -110,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) {
)"))); )")));
} }
TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(2, output.size());
EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value());
EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value());
EXPECT_EQ(true, output[0].Get<bool>());
EXPECT_EQ(false, output[1].Get<bool>());
}
TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: false
}
}
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(0, output.size());
}
TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(0, output.size());
}
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) { TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
SetRunner(R"( SetRunner(R"(
calculator: "GateCalculator" calculator: "GateCalculator"
@ -117,7 +182,7 @@ TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
input_stream: "test_input" input_stream: "test_input"
output_stream: "test_output" output_stream: "test_output"
)"); )");
runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(true)); runner()->MutableSidePackets()->Tag(kAllowTag) = Adopt(new bool(true));
constexpr int64 kTimestampValue0 = 42; constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true); RunTimeStep(kTimestampValue0, true);
@ -139,7 +204,7 @@ TEST_F(GateCalculatorTest, AllowByDisallowSidePacketSetToFalse) {
input_stream: "test_input" input_stream: "test_input"
output_stream: "test_output" output_stream: "test_output"
)"); )");
runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(false)); runner()->MutableSidePackets()->Tag(kDisallowTag) = Adopt(new bool(false));
constexpr int64 kTimestampValue0 = 42; constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true); RunTimeStep(kTimestampValue0, true);
@ -161,7 +226,7 @@ TEST_F(GateCalculatorTest, DisallowByALLOWSidePacketSetToFalse) {
input_stream: "test_input" input_stream: "test_input"
output_stream: "test_output" output_stream: "test_output"
)"); )");
runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(false)); runner()->MutableSidePackets()->Tag(kAllowTag) = Adopt(new bool(false));
constexpr int64 kTimestampValue0 = 42; constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true); RunTimeStep(kTimestampValue0, true);
@ -179,7 +244,7 @@ TEST_F(GateCalculatorTest, DisallowByDISALLOWSidePacketSetToTrue) {
input_stream: "test_input" input_stream: "test_input"
output_stream: "test_output" output_stream: "test_output"
)"); )");
runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(true)); runner()->MutableSidePackets()->Tag(kDisallowTag) = Adopt(new bool(true));
constexpr int64 kTimestampValue0 = 42; constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true); RunTimeStep(kTimestampValue0, true);

View File

@ -0,0 +1,70 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "mediapipe/calculators/core/graph_profile_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/packet.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_profile.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// This calculator periodically copies the GraphProfile from
// mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream.
//
// Example config:
// node {
// calculator: "GraphProfileCalculator"
// output_stream: "FRAME:any_frame"
// output_stream: "PROFILE:graph_profile"
// }
//
class GraphProfileCalculator : public Node {
public:
static constexpr Input<AnyType>::Multiple kFrameIn{"FRAME"};
static constexpr Output<GraphProfile> kProfileOut{"PROFILE"};
MEDIAPIPE_NODE_CONTRACT(kFrameIn, kProfileOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) final {
auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>();
if (prev_profile_ts_ == Timestamp::Unset() ||
cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) {
prev_profile_ts_ = cc->InputTimestamp();
GraphProfile result;
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(&result));
kProfileOut(cc).Send(result);
}
return absl::OkStatus();
}
private:
Timestamp prev_profile_ts_;
};
MEDIAPIPE_REGISTER_NODE(GraphProfileCalculator);
} // namespace api2
} // namespace mediapipe

View File

@ -0,0 +1,30 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message GraphProfileCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional GraphProfileCalculatorOptions ext = 367481815;
}
// The interval in microseconds between successive reported GraphProfiles.
optional int64 profile_interval = 1 [default = 1000000];
}

View File

@ -0,0 +1,211 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/time/time.h"
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_profile.pb.h"
#include "mediapipe/framework/deps/clock.h"
#include "mediapipe/framework/deps/message_matchers.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/proto_ns.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/port/threadpool.h"
#include "mediapipe/framework/tool/simulation_clock_executor.h"
// Tests for GraphProfileCalculator.
using testing::ElementsAre;
namespace mediapipe {
namespace {
constexpr char kClockTag[] = "CLOCK";
using mediapipe::Clock;
// A Calculator with a fixed Process call latency.
class SleepCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->InputSidePackets().Tag(kClockTag).Set<std::shared_ptr<Clock>>();
cc->Inputs().Index(0).SetAny();
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
cc->SetTimestampOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) final {
clock_ =
cc->InputSidePackets().Tag(kClockTag).Get<std::shared_ptr<Clock>>();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) final {
clock_->Sleep(absl::Milliseconds(5));
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
return absl::OkStatus();
}
std::shared_ptr<::mediapipe::Clock> clock_ = nullptr;
};
REGISTER_CALCULATOR(SleepCalculator);
// Tests showing GraphProfileCalculator reporting GraphProfile output packets.
class GraphProfileCalculatorTest : public ::testing::Test {
protected:
void SetUpProfileGraph() {
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(R"(
input_stream: "input_packets_0"
node {
calculator: 'SleepCalculator'
input_side_packet: 'CLOCK:sync_clock'
input_stream: 'input_packets_0'
output_stream: 'output_packets_1'
}
node {
calculator: "GraphProfileCalculator"
options: {
[mediapipe.GraphProfileCalculatorOptions.ext]: {
profile_interval: 25000
}
}
input_stream: "FRAME:output_packets_1"
output_stream: "PROFILE:output_packets_0"
}
)",
&graph_config_));
}
static Packet PacketAt(int64 ts) {
return Adopt(new int64(999)).At(Timestamp(ts));
}
static Packet None() { return Packet().At(Timestamp::OneOverPostStream()); }
static bool IsNone(const Packet& packet) {
return packet.Timestamp() == Timestamp::OneOverPostStream();
}
// Return the values of the timestamps of a vector of Packets.
static std::vector<int64> TimestampValues(
const std::vector<Packet>& packets) {
std::vector<int64> result;
for (const Packet& p : packets) {
result.push_back(p.Timestamp().Value());
}
return result;
}
// Runs a CalculatorGraph with a series of packet sets.
// Returns a vector of packets from each graph output stream.
void RunGraph(const std::vector<std::vector<Packet>>& input_sets,
std::vector<Packet>* output_packets) {
// Register output packet observers.
tool::AddVectorSink("output_packets_0", &graph_config_, output_packets);
// Start running the graph.
std::shared_ptr<SimulationClockExecutor> executor(
new SimulationClockExecutor(3 /*num_threads*/));
CalculatorGraph graph;
MP_ASSERT_OK(graph.SetExecutor("", executor));
graph.profiler()->SetClock(executor->GetClock());
MP_ASSERT_OK(graph.Initialize(graph_config_));
executor->GetClock()->ThreadStart();
MP_ASSERT_OK(graph.StartRun({
{"sync_clock",
Adopt(new std::shared_ptr<::mediapipe::Clock>(executor->GetClock()))},
}));
// Send each packet to the graph in the specified order.
for (int t = 0; t < input_sets.size(); t++) {
const std::vector<Packet>& input_set = input_sets[t];
for (int i = 0; i < input_set.size(); i++) {
const Packet& packet = input_set[i];
if (!IsNone(packet)) {
MP_EXPECT_OK(graph.AddPacketToInputStream(
absl::StrCat("input_packets_", i), packet));
}
executor->GetClock()->Sleep(absl::Milliseconds(10));
}
}
MP_ASSERT_OK(graph.CloseAllInputStreams());
executor->GetClock()->Sleep(absl::Milliseconds(100));
executor->GetClock()->ThreadFinish();
MP_ASSERT_OK(graph.WaitUntilDone());
}
CalculatorGraphConfig graph_config_;
};
TEST_F(GraphProfileCalculatorTest, GraphProfile) {
SetUpProfileGraph();
auto profiler_config = graph_config_.mutable_profiler_config();
profiler_config->set_enable_profiler(true);
profiler_config->set_trace_enabled(false);
profiler_config->set_trace_log_disabled(true);
profiler_config->set_enable_stream_latency(true);
profiler_config->set_calculator_filter(".*Calculator");
// Run the graph with a series of packet sets.
std::vector<std::vector<Packet>> input_sets = {
{PacketAt(10000)}, //
{PacketAt(20000)}, //
{PacketAt(30000)}, //
{PacketAt(40000)},
};
std::vector<Packet> output_packets;
RunGraph(input_sets, &output_packets);
// Validate the output packets.
EXPECT_THAT(TimestampValues(output_packets), //
ElementsAre(10000, 40000));
GraphProfile expected_profile =
mediapipe::ParseTextProtoOrDie<GraphProfile>(R"pb(
calculator_profiles {
name: "GraphProfileCalculator"
open_runtime: 0
process_runtime { total: 0 count: 3 }
process_input_latency { total: 15000 count: 3 }
process_output_latency { total: 15000 count: 3 }
input_stream_profiles {
name: "output_packets_1"
back_edge: false
latency { total: 0 count: 3 }
}
}
calculator_profiles {
name: "SleepCalculator"
open_runtime: 0
process_runtime { total: 15000 count: 3 }
process_input_latency { total: 0 count: 3 }
process_output_latency { total: 15000 count: 3 }
input_stream_profiles {
name: "input_packets_0"
back_edge: false
latency { total: 0 count: 3 }
}
})pb");
EXPECT_THAT(output_packets[1].Get<GraphProfile>(),
mediapipe::EqualsProto(expected_profile));
}
} // namespace
} // namespace mediapipe

View File

@ -29,6 +29,9 @@
namespace mediapipe { namespace mediapipe {
namespace { namespace {
constexpr char kMinuendTag[] = "MINUEND";
constexpr char kSubtrahendTag[] = "SUBTRAHEND";
// A 3x4 Matrix of random integers in [0,1000). // A 3x4 Matrix of random integers in [0,1000).
const char kMatrixText[] = const char kMatrixText[] =
"rows: 3\n" "rows: 3\n"
@ -104,12 +107,13 @@ TEST(MatrixSubtractCalculatorTest, SubtractFromInput) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
Matrix* side_matrix = new Matrix(); Matrix* side_matrix = new Matrix();
MatrixFromTextProto(kMatrixText, side_matrix); MatrixFromTextProto(kMatrixText, side_matrix);
runner.MutableSidePackets()->Tag("SUBTRAHEND") = Adopt(side_matrix); runner.MutableSidePackets()->Tag(kSubtrahendTag) = Adopt(side_matrix);
Matrix* input_matrix = new Matrix(); Matrix* input_matrix = new Matrix();
MatrixFromTextProto(kMatrixText2, input_matrix); MatrixFromTextProto(kMatrixText2, input_matrix);
runner.MutableInputs()->Tag("MINUEND").packets.push_back( runner.MutableInputs()
Adopt(input_matrix).At(Timestamp(0))); ->Tag(kMinuendTag)
.packets.push_back(Adopt(input_matrix).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
EXPECT_EQ(1, runner.Outputs().Index(0).packets.size()); EXPECT_EQ(1, runner.Outputs().Index(0).packets.size());
@ -133,12 +137,12 @@ TEST(MatrixSubtractCalculatorTest, SubtractFromSideMatrix) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
Matrix* side_matrix = new Matrix(); Matrix* side_matrix = new Matrix();
MatrixFromTextProto(kMatrixText, side_matrix); MatrixFromTextProto(kMatrixText, side_matrix);
runner.MutableSidePackets()->Tag("MINUEND") = Adopt(side_matrix); runner.MutableSidePackets()->Tag(kMinuendTag) = Adopt(side_matrix);
Matrix* input_matrix = new Matrix(); Matrix* input_matrix = new Matrix();
MatrixFromTextProto(kMatrixText2, input_matrix); MatrixFromTextProto(kMatrixText2, input_matrix);
runner.MutableInputs() runner.MutableInputs()
->Tag("SUBTRAHEND") ->Tag(kSubtrahendTag)
.packets.push_back(Adopt(input_matrix).At(Timestamp(0))); .packets.push_back(Adopt(input_matrix).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());

View File

@ -14,7 +14,11 @@
#include <memory> #include <memory>
#include "absl/status/status.h"
#include "absl/types/optional.h"
#include "mediapipe/calculators/core/split_vector_calculator.h" #include "mediapipe/calculators/core/split_vector_calculator.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h" #include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
@ -301,4 +305,99 @@ TEST(MuxCalculatorTest, DiscardSkippedInputs_MuxInputStreamHandler) {
} }
} // namespace } // namespace
class PassThroughAndTsBoundUpdateNode : public mediapipe::api2::Node {
public:
static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
static constexpr mediapipe::api2::Output<int> kOutValue{"VALUE"};
static constexpr mediapipe::api2::Output<int> kOutTsBoundUpdate{
"TS_BOUND_UPDATE"};
MEDIAPIPE_NODE_CONTRACT(kInValue, kOutValue, kOutTsBoundUpdate);
absl::Status Process(CalculatorContext* cc) override {
kOutValue(cc).Send(kInValue(cc));
kOutTsBoundUpdate(cc).SetNextTimestampBound(
cc->InputTimestamp().NextAllowedInStream());
return absl::OkStatus();
}
};
MEDIAPIPE_REGISTER_NODE(PassThroughAndTsBoundUpdateNode);
class ToOptionalNode : public mediapipe::api2::Node {
public:
static constexpr mediapipe::api2::Input<int> kTick{"TICK"};
static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
static constexpr mediapipe::api2::Output<absl::optional<int>> kOutValue{
"OUTPUT"};
MEDIAPIPE_NODE_CONTRACT(kTick, kInValue, kOutValue);
absl::Status Process(CalculatorContext* cc) override {
if (kInValue(cc).IsEmpty()) {
kOutValue(cc).Send(absl::nullopt);
} else {
kOutValue(cc).Send({kInValue(cc).Get()});
}
return absl::OkStatus();
}
};
MEDIAPIPE_REGISTER_NODE(ToOptionalNode);
namespace {
TEST(MuxCalculatorTest, HandleTimestampBoundUpdates) {
CalculatorGraphConfig config =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "select"
node {
calculator: "PassThroughAndTsBoundUpdateNode"
input_stream: "VALUE:select"
output_stream: "VALUE:select_ps"
output_stream: "TS_BOUND_UPDATE:ts_bound_update"
}
node {
calculator: "MuxCalculator"
input_stream: "INPUT:0:select_ps"
input_stream: "INPUT:1:ts_bound_update"
input_stream: "SELECT:select"
output_stream: "OUTPUT:select_or_ts_bound_update"
}
node {
calculator: "ToOptionalNode"
input_stream: "TICK:select"
input_stream: "VALUE:select_or_ts_bound_update"
output_stream: "OUTPUT:output"
}
)pb");
std::vector<Packet> output_packets;
tool::AddVectorSink("output", &config, &output_packets);
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(config));
MP_ASSERT_OK(graph.StartRun({}));
auto send_value_fn = [&](int value, Timestamp ts) -> absl::Status {
MP_RETURN_IF_ERROR(
graph.AddPacketToInputStream("select", MakePacket<int>(value).At(ts)));
return graph.WaitUntilIdle();
};
MP_ASSERT_OK(send_value_fn(0, Timestamp(1)));
ASSERT_EQ(output_packets.size(), 1);
EXPECT_EQ(output_packets[0].Get<absl::optional<int>>(), 0);
MP_ASSERT_OK(send_value_fn(1, Timestamp(2)));
ASSERT_EQ(output_packets.size(), 2);
EXPECT_EQ(output_packets[1].Get<absl::optional<int>>(), absl::nullopt);
MP_ASSERT_OK(send_value_fn(0, Timestamp(3)));
ASSERT_EQ(output_packets.size(), 3);
EXPECT_EQ(output_packets[2].Get<absl::optional<int>>(), 0);
MP_ASSERT_OK(graph.CloseAllInputStreams());
MP_ASSERT_OK(graph.WaitUntilDone());
}
} // namespace
} // namespace mediapipe } // namespace mediapipe

View File

@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
const auto calculator_options = const auto calculator_options =
cc->Options<mediapipe::PacketClonerCalculatorOptions>(); cc->Options<mediapipe::PacketClonerCalculatorOptions>();
output_only_when_all_inputs_received_ = output_only_when_all_inputs_received_ =
calculator_options.output_only_when_all_inputs_received(); calculator_options.output_only_when_all_inputs_received() ||
calculator_options.output_packets_only_when_all_inputs_received();
output_empty_packets_before_all_inputs_received_ =
calculator_options.output_packets_only_when_all_inputs_received();
// Parse input streams. // Parse input streams.
tick_signal_index_ = cc->Inputs().NumEntries() - 1; tick_signal_index_ = cc->Inputs().NumEntries() - 1;
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
// Return if one of the input is null. // Return if one of the input is null.
for (int i = 0; i < tick_signal_index_; ++i) { for (int i = 0; i < tick_signal_index_; ++i) {
if (current_[i].IsEmpty()) { if (current_[i].IsEmpty()) {
if (output_empty_packets_before_all_inputs_received_) {
SetAllNextTimestampBounds(cc);
}
return absl::OkStatus(); return absl::OkStatus();
} }
} }
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
} }
private: private:
void SetAllNextTimestampBounds(CalculatorContext* cc) {
for (int j = 0; j < tick_signal_index_; ++j) {
cc->Outputs().Index(j).SetNextTimestampBound(
cc->InputTimestamp().NextAllowedInStream());
}
}
std::vector<Packet> current_; std::vector<Packet> current_;
int tick_signal_index_; int tick_signal_index_;
bool output_only_when_all_inputs_received_; bool output_only_when_all_inputs_received_;
bool output_empty_packets_before_all_inputs_received_;
}; };
REGISTER_CALCULATOR(PacketClonerCalculator); REGISTER_CALCULATOR(PacketClonerCalculator);

View File

@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
// When true, this calculator will drop received TICK packets if any input // When true, this calculator will drop received TICK packets if any input
// stream hasn't received a packet yet. // stream hasn't received a packet yet.
optional bool output_only_when_all_inputs_received = 1 [default = false]; optional bool output_only_when_all_inputs_received = 1 [default = false];
// Similar with above, but also transmit empty packet for all streams before
// all inputs are received.
optional bool output_packets_only_when_all_inputs_received = 2
[default = false];
} }

View File

@ -17,6 +17,9 @@
namespace mediapipe { namespace mediapipe {
constexpr char kPresenceTag[] = "PRESENCE";
constexpr char kPacketTag[] = "PACKET";
// For each non empty input packet, emits a single output packet containing a // For each non empty input packet, emits a single output packet containing a
// boolean value "true", "false" in response to empty packets (a.k.a. timestamp // boolean value "true", "false" in response to empty packets (a.k.a. timestamp
// bound updates) This can be used to "flag" the presence of an arbitrary packet // bound updates) This can be used to "flag" the presence of an arbitrary packet
@ -58,8 +61,8 @@ namespace mediapipe {
class PacketPresenceCalculator : public CalculatorBase { class PacketPresenceCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("PACKET").SetAny(); cc->Inputs().Tag(kPacketTag).SetAny();
cc->Outputs().Tag("PRESENCE").Set<bool>(); cc->Outputs().Tag(kPresenceTag).Set<bool>();
// Process() function is invoked in response to input stream timestamp // Process() function is invoked in response to input stream timestamp
// bound updates. // bound updates.
cc->SetProcessTimestampBounds(true); cc->SetProcessTimestampBounds(true);
@ -73,8 +76,8 @@ class PacketPresenceCalculator : public CalculatorBase {
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
cc->Outputs() cc->Outputs()
.Tag("PRESENCE") .Tag(kPresenceTag)
.AddPacket(MakePacket<bool>(!cc->Inputs().Tag("PACKET").IsEmpty()) .AddPacket(MakePacket<bool>(!cc->Inputs().Tag(kPacketTag).IsEmpty())
.At(cc->InputTimestamp())); .At(cc->InputTimestamp()));
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -39,6 +39,11 @@ namespace mediapipe {
REGISTER_CALCULATOR(PacketResamplerCalculator); REGISTER_CALCULATOR(PacketResamplerCalculator);
namespace { namespace {
constexpr char kSeedTag[] = "SEED";
constexpr char kVideoHeaderTag[] = "VIDEO_HEADER";
constexpr char kOptionsTag[] = "OPTIONS";
// Returns a TimestampDiff (assuming microseconds) corresponding to the // Returns a TimestampDiff (assuming microseconds) corresponding to the
// given time in seconds. // given time in seconds.
TimestampDiff TimestampDiffFromSeconds(double seconds) { TimestampDiff TimestampDiffFromSeconds(double seconds) {
@ -50,16 +55,16 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) {
absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) { absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) {
const auto& resampler_options = const auto& resampler_options =
cc->Options<PacketResamplerCalculatorOptions>(); cc->Options<PacketResamplerCalculatorOptions>();
if (cc->InputSidePackets().HasTag("OPTIONS")) { if (cc->InputSidePackets().HasTag(kOptionsTag)) {
cc->InputSidePackets().Tag("OPTIONS").Set<CalculatorOptions>(); cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
} }
CollectionItemId input_data_id = cc->Inputs().GetId("DATA", 0); CollectionItemId input_data_id = cc->Inputs().GetId("DATA", 0);
if (!input_data_id.IsValid()) { if (!input_data_id.IsValid()) {
input_data_id = cc->Inputs().GetId("", 0); input_data_id = cc->Inputs().GetId("", 0);
} }
cc->Inputs().Get(input_data_id).SetAny(); cc->Inputs().Get(input_data_id).SetAny();
if (cc->Inputs().HasTag("VIDEO_HEADER")) { if (cc->Inputs().HasTag(kVideoHeaderTag)) {
cc->Inputs().Tag("VIDEO_HEADER").Set<VideoHeader>(); cc->Inputs().Tag(kVideoHeaderTag).Set<VideoHeader>();
} }
CollectionItemId output_data_id = cc->Outputs().GetId("DATA", 0); CollectionItemId output_data_id = cc->Outputs().GetId("DATA", 0);
@ -67,15 +72,15 @@ absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) {
output_data_id = cc->Outputs().GetId("", 0); output_data_id = cc->Outputs().GetId("", 0);
} }
cc->Outputs().Get(output_data_id).SetSameAs(&cc->Inputs().Get(input_data_id)); cc->Outputs().Get(output_data_id).SetSameAs(&cc->Inputs().Get(input_data_id));
if (cc->Outputs().HasTag("VIDEO_HEADER")) { if (cc->Outputs().HasTag(kVideoHeaderTag)) {
cc->Outputs().Tag("VIDEO_HEADER").Set<VideoHeader>(); cc->Outputs().Tag(kVideoHeaderTag).Set<VideoHeader>();
} }
if (resampler_options.jitter() != 0.0) { if (resampler_options.jitter() != 0.0) {
RET_CHECK_GT(resampler_options.jitter(), 0.0); RET_CHECK_GT(resampler_options.jitter(), 0.0);
RET_CHECK_LE(resampler_options.jitter(), 1.0); RET_CHECK_LE(resampler_options.jitter(), 1.0);
RET_CHECK(cc->InputSidePackets().HasTag("SEED")); RET_CHECK(cc->InputSidePackets().HasTag(kSeedTag));
cc->InputSidePackets().Tag("SEED").Set<std::string>(); cc->InputSidePackets().Tag(kSeedTag).Set<std::string>();
} }
return absl::OkStatus(); return absl::OkStatus();
} }
@ -143,9 +148,9 @@ absl::Status PacketResamplerCalculator::Open(CalculatorContext* cc) {
absl::Status PacketResamplerCalculator::Process(CalculatorContext* cc) { absl::Status PacketResamplerCalculator::Process(CalculatorContext* cc) {
if (cc->InputTimestamp() == Timestamp::PreStream() && if (cc->InputTimestamp() == Timestamp::PreStream() &&
cc->Inputs().UsesTags() && cc->Inputs().HasTag("VIDEO_HEADER") && cc->Inputs().UsesTags() && cc->Inputs().HasTag(kVideoHeaderTag) &&
!cc->Inputs().Tag("VIDEO_HEADER").IsEmpty()) { !cc->Inputs().Tag(kVideoHeaderTag).IsEmpty()) {
video_header_ = cc->Inputs().Tag("VIDEO_HEADER").Get<VideoHeader>(); video_header_ = cc->Inputs().Tag(kVideoHeaderTag).Get<VideoHeader>();
video_header_.frame_rate = frame_rate_; video_header_.frame_rate = frame_rate_;
if (cc->Inputs().Get(input_data_id_).IsEmpty()) { if (cc->Inputs().Get(input_data_id_).IsEmpty()) {
return absl::OkStatus(); return absl::OkStatus();
@ -234,7 +239,7 @@ absl::Status LegacyJitterWithReflectionStrategy::Open(CalculatorContext* cc) {
"ignored, because we are adding jitter."; "ignored, because we are adding jitter.";
} }
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>(); const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
random_ = CreateSecureRandom(seed); random_ = CreateSecureRandom(seed);
if (random_ == nullptr) { if (random_ == nullptr) {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
@ -357,7 +362,7 @@ absl::Status ReproducibleJitterWithReflectionStrategy::Open(
"ignored, because we are adding jitter."; "ignored, because we are adding jitter.";
} }
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>(); const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
random_ = CreateSecureRandom(seed); random_ = CreateSecureRandom(seed);
if (random_ == nullptr) { if (random_ == nullptr) {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
@ -504,7 +509,7 @@ absl::Status JitterWithoutReflectionStrategy::Open(CalculatorContext* cc) {
"ignored, because we are adding jitter."; "ignored, because we are adding jitter.";
} }
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>(); const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
random_ = CreateSecureRandom(seed); random_ = CreateSecureRandom(seed);
if (random_ == nullptr) { if (random_ == nullptr) {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
@ -635,9 +640,9 @@ absl::Status NoJitterStrategy::Process(CalculatorContext* cc) {
base_timestamp_ + base_timestamp_ +
TimestampDiffFromSeconds(first_index / calculator_->frame_rate_); TimestampDiffFromSeconds(first_index / calculator_->frame_rate_);
} }
if (cc->Outputs().UsesTags() && cc->Outputs().HasTag("VIDEO_HEADER")) { if (cc->Outputs().UsesTags() && cc->Outputs().HasTag(kVideoHeaderTag)) {
cc->Outputs() cc->Outputs()
.Tag("VIDEO_HEADER") .Tag(kVideoHeaderTag)
.Add(new VideoHeader(calculator_->video_header_), .Add(new VideoHeader(calculator_->video_header_),
Timestamp::PreStream()); Timestamp::PreStream());
} }

View File

@ -32,6 +32,12 @@ namespace mediapipe {
using ::testing::ElementsAre; using ::testing::ElementsAre;
namespace { namespace {
constexpr char kOptionsTag[] = "OPTIONS";
constexpr char kSeedTag[] = "SEED";
constexpr char kVideoHeaderTag[] = "VIDEO_HEADER";
constexpr char kDataTag[] = "DATA";
// A simple version of CalculatorRunner with built-in convenience // A simple version of CalculatorRunner with built-in convenience
// methods for setting inputs from a vector and checking outputs // methods for setting inputs from a vector and checking outputs
// against expected outputs (both timestamps and contents). // against expected outputs (both timestamps and contents).
@ -464,7 +470,7 @@ TEST(PacketResamplerCalculatorTest, SetVideoHeader) {
)pb")); )pb"));
for (const int64 ts : {0, 5000, 10010, 15001, 19990}) { for (const int64 ts : {0, 5000, 10010, 15001, 19990}) {
runner.MutableInputs()->Tag("DATA").packets.push_back( runner.MutableInputs()->Tag(kDataTag).packets.push_back(
Adopt(new std::string(absl::StrCat("Frame #", ts))).At(Timestamp(ts))); Adopt(new std::string(absl::StrCat("Frame #", ts))).At(Timestamp(ts)));
} }
VideoHeader video_header_in; VideoHeader video_header_in;
@ -474,16 +480,16 @@ TEST(PacketResamplerCalculatorTest, SetVideoHeader) {
video_header_in.duration = 1.0; video_header_in.duration = 1.0;
video_header_in.format = ImageFormat::SRGB; video_header_in.format = ImageFormat::SRGB;
runner.MutableInputs() runner.MutableInputs()
->Tag("VIDEO_HEADER") ->Tag(kVideoHeaderTag)
.packets.push_back( .packets.push_back(
Adopt(new VideoHeader(video_header_in)).At(Timestamp::PreStream())); Adopt(new VideoHeader(video_header_in)).At(Timestamp::PreStream()));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
ASSERT_EQ(1, runner.Outputs().Tag("VIDEO_HEADER").packets.size()); ASSERT_EQ(1, runner.Outputs().Tag(kVideoHeaderTag).packets.size());
EXPECT_EQ(Timestamp::PreStream(), EXPECT_EQ(Timestamp::PreStream(),
runner.Outputs().Tag("VIDEO_HEADER").packets[0].Timestamp()); runner.Outputs().Tag(kVideoHeaderTag).packets[0].Timestamp());
const VideoHeader& video_header_out = const VideoHeader& video_header_out =
runner.Outputs().Tag("VIDEO_HEADER").packets[0].Get<VideoHeader>(); runner.Outputs().Tag(kVideoHeaderTag).packets[0].Get<VideoHeader>();
EXPECT_EQ(video_header_in.width, video_header_out.width); EXPECT_EQ(video_header_in.width, video_header_out.width);
EXPECT_EQ(video_header_in.height, video_header_out.height); EXPECT_EQ(video_header_in.height, video_header_out.height);
EXPECT_DOUBLE_EQ(50.0, video_header_out.frame_rate); EXPECT_DOUBLE_EQ(50.0, video_header_out.frame_rate);
@ -725,7 +731,7 @@ TEST(PacketResamplerCalculatorTest, OptionsSidePacket) {
[mediapipe.PacketResamplerCalculatorOptions.ext] { [mediapipe.PacketResamplerCalculatorOptions.ext] {
frame_rate: 30 frame_rate: 30
})pb")); })pb"));
runner.MutableSidePackets()->Tag("OPTIONS") = Adopt(options); runner.MutableSidePackets()->Tag(kOptionsTag) = Adopt(options);
runner.SetInput({-222, 15000, 32000, 49999, 150000}); runner.SetInput({-222, 15000, 32000, 49999, 150000});
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
EXPECT_EQ(6, runner.Outputs().Index(0).packets.size()); EXPECT_EQ(6, runner.Outputs().Index(0).packets.size());
@ -740,7 +746,7 @@ TEST(PacketResamplerCalculatorTest, OptionsSidePacket) {
frame_rate: 30 frame_rate: 30
base_timestamp: 0 base_timestamp: 0
})pb")); })pb"));
runner.MutableSidePackets()->Tag("OPTIONS") = Adopt(options); runner.MutableSidePackets()->Tag(kOptionsTag) = Adopt(options);
runner.SetInput({-222, 15000, 32000, 49999, 150000}); runner.SetInput({-222, 15000, 32000, 49999, 150000});
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());

View File

@ -217,6 +217,7 @@ absl::Status PacketThinnerCalculator::Open(CalculatorContext* cc) {
header->format = video_header.format; header->format = video_header.format;
header->width = video_header.width; header->width = video_header.width;
header->height = video_header.height; header->height = video_header.height;
header->duration = video_header.duration;
header->frame_rate = new_frame_rate; header->frame_rate = new_frame_rate;
cc->Outputs().Index(0).SetHeader(Adopt(header.release())); cc->Outputs().Index(0).SetHeader(Adopt(header.release()));
} else { } else {

View File

@ -29,6 +29,8 @@
namespace mediapipe { namespace mediapipe {
namespace { namespace {
constexpr char kPeriodTag[] = "PERIOD";
// A simple version of CalculatorRunner with built-in convenience methods for // A simple version of CalculatorRunner with built-in convenience methods for
// setting inputs from a vector and checking outputs against a vector of // setting inputs from a vector and checking outputs against a vector of
// expected outputs. // expected outputs.
@ -121,7 +123,7 @@ TEST(PacketThinnerCalculatorTest, ASyncUniformStreamThinningTestBySidePacket) {
SimpleRunner runner(node); SimpleRunner runner(node);
runner.SetInput({2, 4, 6, 8, 10, 12, 14}); runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5); runner.MutableSidePackets()->Tag(kPeriodTag) = MakePacket<int64>(5);
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 8, 14}; const std::vector<int64> expected_timestamps = {2, 8, 14};
@ -160,7 +162,7 @@ TEST(PacketThinnerCalculatorTest, SyncUniformStreamThinningTestBySidePacket1) {
SimpleRunner runner(node); SimpleRunner runner(node);
runner.SetInput({2, 4, 6, 8, 10, 12, 14}); runner.SetInput({2, 4, 6, 8, 10, 12, 14});
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5); runner.MutableSidePackets()->Tag(kPeriodTag) = MakePacket<int64>(5);
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<int64> expected_timestamps = {2, 6, 10, 14}; const std::vector<int64> expected_timestamps = {2, 6, 10, 14};

View File

@ -39,6 +39,8 @@ using ::testing::Pair;
using ::testing::Value; using ::testing::Value;
namespace { namespace {
constexpr char kDisallowTag[] = "DISALLOW";
// Returns the timestamp values for a vector of Packets. // Returns the timestamp values for a vector of Packets.
// TODO: puth this kind of test util in a common place. // TODO: puth this kind of test util in a common place.
std::vector<int64> TimestampValues(const std::vector<Packet>& packets) { std::vector<int64> TimestampValues(const std::vector<Packet>& packets) {
@ -702,14 +704,14 @@ class DroppingGateCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).SetAny(); cc->Inputs().Index(0).SetAny();
cc->Inputs().Tag("DISALLOW").Set<bool>(); cc->Inputs().Tag(kDisallowTag).Set<bool>();
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
if (!cc->Inputs().Index(0).IsEmpty() && if (!cc->Inputs().Index(0).IsEmpty() &&
!cc->Inputs().Tag("DISALLOW").Get<bool>()) { !cc->Inputs().Tag(kDisallowTag).Get<bool>()) {
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value()); cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
} }
return absl::OkStatus(); return absl::OkStatus();

View File

@ -41,11 +41,14 @@
// } // }
namespace mediapipe { namespace mediapipe {
constexpr char kEncodedTag[] = "ENCODED";
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
class QuantizeFloatVectorCalculator : public CalculatorBase { class QuantizeFloatVectorCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("FLOAT_VECTOR").Set<std::vector<float>>(); cc->Inputs().Tag(kFloatVectorTag).Set<std::vector<float>>();
cc->Outputs().Tag("ENCODED").Set<std::string>(); cc->Outputs().Tag(kEncodedTag).Set<std::string>();
return absl::OkStatus(); return absl::OkStatus();
} }
@ -70,7 +73,7 @@ class QuantizeFloatVectorCalculator : public CalculatorBase {
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
const std::vector<float>& float_vector = const std::vector<float>& float_vector =
cc->Inputs().Tag("FLOAT_VECTOR").Value().Get<std::vector<float>>(); cc->Inputs().Tag(kFloatVectorTag).Value().Get<std::vector<float>>();
int feature_size = float_vector.size(); int feature_size = float_vector.size();
std::string encoded_features; std::string encoded_features;
encoded_features.reserve(feature_size); encoded_features.reserve(feature_size);
@ -86,8 +89,10 @@ class QuantizeFloatVectorCalculator : public CalculatorBase {
(old_value - min_quantized_value_) * (255.0 / range_)); (old_value - min_quantized_value_) * (255.0 / range_));
encoded_features += encoded; encoded_features += encoded;
} }
cc->Outputs().Tag("ENCODED").AddPacket( cc->Outputs()
MakePacket<std::string>(encoded_features).At(cc->InputTimestamp())); .Tag(kEncodedTag)
.AddPacket(
MakePacket<std::string>(encoded_features).At(cc->InputTimestamp()));
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -25,6 +25,9 @@
namespace mediapipe { namespace mediapipe {
constexpr char kEncodedTag[] = "ENCODED";
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) { TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
CalculatorGraphConfig::Node node_config = CalculatorGraphConfig::Node node_config =
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb( ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
@ -40,7 +43,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> empty_vector; std::vector<float> empty_vector;
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0))); MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
@ -67,7 +70,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> empty_vector; std::vector<float> empty_vector;
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0))); MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
@ -94,7 +97,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> empty_vector; std::vector<float> empty_vector;
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0))); MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
auto status = runner.Run(); auto status = runner.Run();
@ -121,11 +124,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestEmptyVector) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> empty_vector; std::vector<float> empty_vector;
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0))); MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets; const std::vector<Packet>& outputs =
runner.Outputs().Tag(kEncodedTag).packets;
EXPECT_EQ(1, outputs.size()); EXPECT_EQ(1, outputs.size());
EXPECT_TRUE(outputs[0].Get<std::string>().empty()); EXPECT_TRUE(outputs[0].Get<std::string>().empty());
EXPECT_EQ(Timestamp(0), outputs[0].Timestamp()); EXPECT_EQ(Timestamp(0), outputs[0].Timestamp());
@ -147,11 +151,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestNonEmptyVector) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> vector = {0.0f, -64.0f, 64.0f, -32.0f, 32.0f}; std::vector<float> vector = {0.0f, -64.0f, 64.0f, -32.0f, 32.0f};
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(vector).At(Timestamp(0))); MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets; const std::vector<Packet>& outputs =
runner.Outputs().Tag(kEncodedTag).packets;
EXPECT_EQ(1, outputs.size()); EXPECT_EQ(1, outputs.size());
const std::string& result = outputs[0].Get<std::string>(); const std::string& result = outputs[0].Get<std::string>();
ASSERT_FALSE(result.empty()); ASSERT_FALSE(result.empty());
@ -185,11 +190,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestSaturation) {
CalculatorRunner runner(node_config); CalculatorRunner runner(node_config);
std::vector<float> vector = {-65.0f, 65.0f}; std::vector<float> vector = {-65.0f, 65.0f};
runner.MutableInputs() runner.MutableInputs()
->Tag("FLOAT_VECTOR") ->Tag(kFloatVectorTag)
.packets.push_back( .packets.push_back(
MakePacket<std::vector<float>>(vector).At(Timestamp(0))); MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets; const std::vector<Packet>& outputs =
runner.Outputs().Tag(kEncodedTag).packets;
EXPECT_EQ(1, outputs.size()); EXPECT_EQ(1, outputs.size());
const std::string& result = outputs[0].Get<std::string>(); const std::string& result = outputs[0].Get<std::string>();
ASSERT_FALSE(result.empty()); ASSERT_FALSE(result.empty());

View File

@ -23,6 +23,9 @@
namespace mediapipe { namespace mediapipe {
constexpr char kAllowTag[] = "ALLOW";
constexpr char kMaxInFlightTag[] = "MAX_IN_FLIGHT";
// RealTimeFlowLimiterCalculator is used to limit the number of pipelined // RealTimeFlowLimiterCalculator is used to limit the number of pipelined
// processing operations in a section of the graph. // processing operations in a section of the graph.
// //
@ -86,11 +89,11 @@ class RealTimeFlowLimiterCalculator : public CalculatorBase {
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i))); cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
} }
cc->Inputs().Get("FINISHED", 0).SetAny(); cc->Inputs().Get("FINISHED", 0).SetAny();
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) { if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Set<int>(); cc->InputSidePackets().Tag(kMaxInFlightTag).Set<int>();
} }
if (cc->Outputs().HasTag("ALLOW")) { if (cc->Outputs().HasTag(kAllowTag)) {
cc->Outputs().Tag("ALLOW").Set<bool>(); cc->Outputs().Tag(kAllowTag).Set<bool>();
} }
cc->SetInputStreamHandler("ImmediateInputStreamHandler"); cc->SetInputStreamHandler("ImmediateInputStreamHandler");
@ -101,8 +104,8 @@ class RealTimeFlowLimiterCalculator : public CalculatorBase {
absl::Status Open(CalculatorContext* cc) final { absl::Status Open(CalculatorContext* cc) final {
finished_id_ = cc->Inputs().GetId("FINISHED", 0); finished_id_ = cc->Inputs().GetId("FINISHED", 0);
max_in_flight_ = 1; max_in_flight_ = 1;
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) { if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
max_in_flight_ = cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Get<int>(); max_in_flight_ = cc->InputSidePackets().Tag(kMaxInFlightTag).Get<int>();
} }
RET_CHECK_GE(max_in_flight_, 1); RET_CHECK_GE(max_in_flight_, 1);
num_in_flight_ = 0; num_in_flight_ = 0;

View File

@ -33,6 +33,9 @@
namespace mediapipe { namespace mediapipe {
namespace { namespace {
constexpr char kFinishedTag[] = "FINISHED";
// A simple Semaphore for synchronizing test threads. // A simple Semaphore for synchronizing test threads.
class AtomicSemaphore { class AtomicSemaphore {
public: public:
@ -112,7 +115,7 @@ TEST(RealTimeFlowLimiterCalculator, BasicTest) {
Timestamp timestamp = Timestamp timestamp =
Timestamp((i + 1) * Timestamp::kTimestampUnitsPerSecond); Timestamp((i + 1) * Timestamp::kTimestampUnitsPerSecond);
runner.MutableInputs() runner.MutableInputs()
->Tag("FINISHED") ->Tag(kFinishedTag)
.packets.push_back(MakePacket<bool>(true).At(timestamp)); .packets.push_back(MakePacket<bool>(true).At(timestamp));
} }

View File

@ -22,6 +22,8 @@ namespace mediapipe {
namespace { namespace {
constexpr char kPacketOffsetTag[] = "PACKET_OFFSET";
// Adds packets containing integers equal to their original timestamp. // Adds packets containing integers equal to their original timestamp.
void AddPackets(CalculatorRunner* runner) { void AddPackets(CalculatorRunner* runner) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
@ -111,7 +113,7 @@ TEST(SequenceShiftCalculatorTest, SidePacketOffset) {
CalculatorRunner runner(node); CalculatorRunner runner(node);
AddPackets(&runner); AddPackets(&runner);
runner.MutableSidePackets()->Tag("PACKET_OFFSET") = Adopt(new int(-2)); runner.MutableSidePackets()->Tag(kPacketOffsetTag) = Adopt(new int(-2));
MP_ASSERT_OK(runner.Run()); MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& input_packets = const std::vector<Packet>& input_packets =
runner.MutableInputs()->Index(0).packets; runner.MutableInputs()->Index(0).packets;

View File

@ -80,4 +80,7 @@ typedef SplitVectorCalculator<mediapipe::ClassificationList, false>
SplitClassificationListVectorCalculator; SplitClassificationListVectorCalculator;
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator); REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
typedef SplitVectorCalculator<uint64_t, false> SplitUint64tVectorCalculator;
REGISTER_CALCULATOR(SplitUint64tVectorCalculator);
} // namespace mediapipe } // namespace mediapipe

View File

@ -661,3 +661,138 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:parse_text_proto",
], ],
) )
cc_library(
name = "affine_transformation",
hdrs = ["affine_transformation.h"],
deps = ["@com_google_absl//absl/status:statusor"],
)
cc_library(
name = "affine_transformation_runner_gl",
srcs = ["affine_transformation_runner_gl.cc"],
hdrs = ["affine_transformation_runner_gl.h"],
deps = [
":affine_transformation",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:ret_check",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_origin_cc_proto",
"//mediapipe/gpu:shader_util",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "affine_transformation_runner_opencv",
srcs = ["affine_transformation_runner_opencv.cc"],
hdrs = ["affine_transformation_runner_opencv.h"],
deps = [
":affine_transformation",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status:statusor",
"@eigen_archive//:eigen3",
],
)
mediapipe_proto_library(
name = "warp_affine_calculator_proto",
srcs = ["warp_affine_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/gpu:gpu_origin_proto",
],
)
cc_library(
name = "warp_affine_calculator",
srcs = ["warp_affine_calculator.cc"],
hdrs = ["warp_affine_calculator.h"],
visibility = ["//visibility:public"],
deps = [
":affine_transformation",
":affine_transformation_runner_opencv",
":warp_affine_calculator_cc_proto",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
":affine_transformation_runner_gl",
],
}),
alwayslink = 1,
)
cc_test(
name = "warp_affine_calculator_test",
srcs = ["warp_affine_calculator_test.cc"],
data = [
"//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png",
],
tags = ["desktop_only_test"],
deps = [
":affine_transformation",
":warp_affine_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_converter",
"//mediapipe/calculators/tensor:image_to_tensor_utils",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
)

View File

@ -0,0 +1,55 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
#include <array>
#include "absl/status/statusor.h"
namespace mediapipe {
class AffineTransformation {
public:
// Pixel extrapolation method.
// When converting image to tensor it may happen that tensor needs to read
// pixels outside image boundaries. Border mode helps to specify how such
// pixels will be calculated.
enum class BorderMode { kZero, kReplicate };
struct Size {
int width;
int height;
};
template <typename InputT, typename OutputT>
class Runner {
public:
virtual ~Runner() = default;
// Transforms input into output using @matrix as following:
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
// matrix[4] * x + matrix[5] * y + matrix[7])
// where x and y ranges are defined by @output_size.
virtual absl::StatusOr<OutputT> Run(const InputT& input,
const std::array<float, 16>& matrix,
const Size& output_size,
BorderMode border_mode) = 0;
};
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_

View File

@ -0,0 +1,354 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
#include <memory>
#include <optional>
#include "Eigen/Core"
#include "Eigen/Geometry"
#include "Eigen/LU"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#include "mediapipe/gpu/shader_util.h"
namespace mediapipe {
namespace {
using mediapipe::GlCalculatorHelper;
using mediapipe::GlhCreateProgram;
using mediapipe::GlTexture;
using mediapipe::GpuBuffer;
using mediapipe::GpuOrigin;
bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) {
switch (gpu_origin) {
case GpuOrigin::DEFAULT:
case GpuOrigin::CONVENTIONAL:
#ifdef __APPLE__
return false;
#else
return true;
#endif // __APPLE__
case GpuOrigin::TOP_LEFT:
return false;
}
}
#ifdef __APPLE__
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0
#else
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1
#endif // __APPLE__
bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
return gl_context.gl_major_version() > 3 ||
(gl_context.gl_major_version() == 3 &&
gl_context.gl_minor_version() >= 2);
}
constexpr int kAttribVertex = 0;
constexpr int kAttribTexturePosition = 1;
constexpr int kNumAttributes = 2;
class GlTextureWarpAffineRunner
: public AffineTransformation::Runner<GpuBuffer,
std::unique_ptr<GpuBuffer>> {
public:
GlTextureWarpAffineRunner(std::shared_ptr<GlCalculatorHelper> gl_helper,
GpuOrigin::Mode gpu_origin)
: gl_helper_(gl_helper), gpu_origin_(gpu_origin) {}
absl::Status Init() {
return gl_helper_->RunInGlContext([this]() -> absl::Status {
const GLint attr_location[kNumAttributes] = {
kAttribVertex,
kAttribTexturePosition,
};
const GLchar* attr_name[kNumAttributes] = {
"position",
"texture_coordinate",
};
constexpr GLchar kVertShader[] = R"(
in vec4 position;
in mediump vec4 texture_coordinate;
out mediump vec2 sample_coordinate;
uniform mat4 transform_matrix;
void main() {
gl_Position = position;
vec4 tc = transform_matrix * texture_coordinate;
sample_coordinate = tc.xy;
}
)";
constexpr GLchar kFragShader[] = R"(
DEFAULT_PRECISION(mediump, float)
in vec2 sample_coordinate;
uniform sampler2D input_texture;
#ifdef GL_ES
#define fragColor gl_FragColor
#else
out vec4 fragColor;
#endif // defined(GL_ES);
void main() {
vec4 color = texture2D(input_texture, sample_coordinate);
#ifdef CUSTOM_ZERO_BORDER_MODE
float out_of_bounds =
float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 ||
sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0);
color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds);
#endif // defined(CUSTOM_ZERO_BORDER_MODE)
fragColor = color;
}
)";
// Create program and set parameters.
auto create_fn = [&](const std::string& vs,
const std::string& fs) -> absl::StatusOr<Program> {
GLuint program = 0;
GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0],
attr_location, &program);
RET_CHECK(program) << "Problem initializing warp affine program.";
glUseProgram(program);
glUniform1i(glGetUniformLocation(program, "input_texture"), 1);
GLint matrix_id = glGetUniformLocation(program, "transform_matrix");
return Program{.id = program, .matrix_id = matrix_id};
};
const std::string vert_src =
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader);
const std::string frag_src = absl::StrCat(
mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader);
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
std::string custom_zero_border_mode_def = R"(
#define CUSTOM_ZERO_BORDER_MODE
)";
const std::string frag_custom_zero_src =
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
custom_zero_border_mode_def, kFragShader);
return create_fn(vert_src, frag_custom_zero_src);
};
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
}
#else
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
glGenFramebuffers(1, &framebuffer_);
// vertex storage
glGenBuffers(2, vbo_);
glGenVertexArrays(1, &vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
return absl::OkStatus();
});
}
absl::StatusOr<std::unique_ptr<GpuBuffer>> Run(
const GpuBuffer& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
std::unique_ptr<GpuBuffer> gpu_buffer;
MP_RETURN_IF_ERROR(
gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode,
&gpu_buffer]() -> absl::Status {
auto input_texture = gl_helper_->CreateSourceTexture(input);
auto output_texture = gl_helper_->CreateDestinationTexture(
size.width, size.height, input.format());
MP_RETURN_IF_ERROR(
RunInternal(input_texture, matrix, border_mode, &output_texture));
gpu_buffer = output_texture.GetFrame<GpuBuffer>();
return absl::OkStatus();
}));
return gpu_buffer;
}
absl::Status RunInternal(const GlTexture& texture,
const std::array<float, 16>& matrix,
AffineTransformation::BorderMode border_mode,
GlTexture* output) {
glDisable(GL_DEPTH_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
glViewport(0, 0, output->width(), output->height());
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, output->name());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
output->name(), 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(texture.target(), texture.name());
// a) Filtering.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
// b) Clamping.
std::optional<Program> program = program_;
switch (border_mode) {
case AffineTransformation::BorderMode::kReplicate: {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
break;
}
case AffineTransformation::BorderMode::kZero: {
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
if (program_custom_zero_) {
program = program_custom_zero_;
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR,
std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f}.data());
}
#else
RET_CHECK(program_custom_zero_)
<< "Program must have been initialized.";
program = program_custom_zero_;
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
break;
}
}
glUseProgram(program->id);
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> eigen_mat(matrix.data());
if (IsMatrixVerticalFlipNeeded(gpu_origin_)) {
// @matrix describes affine transformation in terms of TOP LEFT origin, so
// in some cases/on some platforms an extra flipping should be done before
// and after.
const Eigen::Matrix<float, 4, 4, Eigen::RowMajor> flip_y(
{{1.0f, 0.0f, 0.0f, 0.0f},
{0.0f, -1.0f, 0.0f, 1.0f},
{0.0f, 0.0f, 1.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 1.0f}});
eigen_mat = flip_y * eigen_mat * flip_y;
}
// If GL context is ES2, then GL_FALSE must be used for 'transpose'
// GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported.
// Hence, transposing the matrix and always passing transposed.
eigen_mat.transposeInPlace();
glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data());
// vao
glBindVertexArray(vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glEnableVertexAttribArray(kAttribVertex);
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glEnableVertexAttribArray(kAttribTexturePosition);
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
// draw
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Resetting to MediaPipe texture param defaults.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glDisableVertexAttribArray(kAttribVertex);
glDisableVertexAttribArray(kAttribTexturePosition);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
return absl::OkStatus();
}
~GlTextureWarpAffineRunner() override {
gl_helper_->RunInGlContext([this]() {
// Release OpenGL resources.
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
if (program_.id != 0) glDeleteProgram(program_.id);
if (program_custom_zero_ && program_custom_zero_->id != 0) {
glDeleteProgram(program_custom_zero_->id);
}
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
glDeleteBuffers(2, vbo_);
});
}
private:
struct Program {
GLuint id;
GLint matrix_id;
};
std::shared_ptr<GlCalculatorHelper> gl_helper_;
GpuOrigin::Mode gpu_origin_;
GLuint vao_ = 0;
GLuint vbo_[2] = {0, 0};
Program program_;
std::optional<Program> program_custom_zero_;
GLuint framebuffer_ = 0;
};
#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
} // namespace
absl::StatusOr<std::unique_ptr<
AffineTransformation::Runner<GpuBuffer, std::unique_ptr<GpuBuffer>>>>
CreateAffineTransformationGlRunner(
std::shared_ptr<GlCalculatorHelper> gl_helper, GpuOrigin::Mode gpu_origin) {
auto runner =
absl::make_unique<GlTextureWarpAffineRunner>(gl_helper, gpu_origin);
MP_RETURN_IF_ERROR(runner->Init());
return runner;
}
} // namespace mediapipe

View File

@ -0,0 +1,36 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
namespace mediapipe {
absl::StatusOr<std::unique_ptr<AffineTransformation::Runner<
mediapipe::GpuBuffer, std::unique_ptr<mediapipe::GpuBuffer>>>>
CreateAffineTransformationGlRunner(
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper,
mediapipe::GpuOrigin::Mode gpu_origin);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_

View File

@ -0,0 +1,160 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
#include <memory>
#include "absl/memory/memory.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace {
cv::BorderTypes GetBorderModeForOpenCv(
AffineTransformation::BorderMode border_mode) {
switch (border_mode) {
case AffineTransformation::BorderMode::kZero:
return cv::BORDER_CONSTANT;
case AffineTransformation::BorderMode::kReplicate:
return cv::BORDER_REPLICATE;
}
}
class OpenCvRunner
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
public:
absl::StatusOr<ImageFrame> Run(
const ImageFrame& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
// accepts and produces relative coordinates) should be adjusted to first
// normalize coordinates and then scale them.
// clang-format off
cv::Matx44f normalize_dst_coordinate({
1.0f / size.width, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f / size.height, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f scale_src_coordinate({
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
cv::Matx44f adjust_dst_coordinate;
cv::Matx44f adjust_src_coordinate;
// TODO: update to always use accurate implementation.
constexpr bool kOpenCvCompatibility = true;
if (kOpenCvCompatibility) {
adjust_dst_coordinate = normalize_dst_coordinate;
adjust_src_coordinate = scale_src_coordinate;
} else {
// To do an accurate affine image transformation and make "on-cpu" and
// "on-gpu" calculations aligned - extra offset is required to select
// correct pixels.
//
// Each destination pixel corresponds to some pixels region from source
// image.(In case of downscaling there can be more than one pixel.) The
// offset for x and y is calculated in the way, so pixel in the middle of
// the region is selected.
//
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
// without a rotation:
// 1. Each destination pixel corresponds to 10x5 region
// X range: [0, .. , 9]
// Y range: [0, .. , 4]
// 2. Considering we have __discrete__ pixels, the center of the region is
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
// (4.5, 2).
// 3. When using the above as an offset for every pixel select while
// downscaling, resulting pixels are:
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
// ..
// (4.5, 47), (14.5, 47), .., (94.5, 47)
// instead of:
// (0, 0), (10, 0), .. , (90, 0)
// (0, 5), (10, 7), .. , (90, 5)
// ..
// (0, 45), (10, 45), .., (90, 45)
// The latter looks shifted.
//
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
// transformation matrix should shift coordinates by (0.5, 0.5) as the
// very first step.
//
// Due to the above shift, transformed coordinates would be valid for
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
// T0 make it valid for __discrete__ pixels, transformation matrix should
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
// clang-format off
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
0.0f, 1.0f, 0.0f, 0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
0.0f, 1.0f, 0.0f, -0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
adjust_src_coordinate = shift_src * scale_src_coordinate;
}
cv::Matx44f transform(matrix.data());
cv::Matx44f transform_absolute =
adjust_src_coordinate * transform * adjust_dst_coordinate;
cv::Mat in_mat = formats::MatView(&input);
cv::Mat cv_affine_transform(2, 3, CV_32F);
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
ImageFrame out_image(input.Format(), size.width, size.height);
cv::Mat out_mat = formats::MatView(&out_image);
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
cv::Size(out_mat.cols, out_mat.rows),
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
GetBorderModeForOpenCv(border_mode));
return out_image;
}
};
} // namespace
absl::StatusOr<
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
CreateAffineTransformationOpenCvRunner() {
return absl::make_unique<OpenCvRunner>();
}
} // namespace mediapipe

View File

@ -0,0 +1,32 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/formats/image_frame.h"
namespace mediapipe {
absl::StatusOr<
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
CreateAffineTransformationOpenCvRunner();
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_

View File

@ -240,7 +240,7 @@ absl::Status BilateralFilterCalculator::RenderCpu(CalculatorContext* cc) {
auto input_mat = mediapipe::formats::MatView(&input_frame); auto input_mat = mediapipe::formats::MatView(&input_frame);
// Only 1 or 3 channel images supported by OpenCV. // Only 1 or 3 channel images supported by OpenCV.
if ((input_mat.channels() == 1 || input_mat.channels() == 3)) { if (!(input_mat.channels() == 1 || input_mat.channels() == 3)) {
return absl::InternalError( return absl::InternalError(
"CPU filtering supports only 1 or 3 channel input images."); "CPU filtering supports only 1 or 3 channel input images.");
} }

View File

@ -36,7 +36,7 @@ using GpuBuffer = mediapipe::GpuBuffer;
// stored on the target storage (CPU vs GPU) specified in the calculator option. // stored on the target storage (CPU vs GPU) specified in the calculator option.
// //
// The clone shares ownership of the input pixel data on the existing storage. // The clone shares ownership of the input pixel data on the existing storage.
// If the target storage is diffrent from the existing one, then the data is // If the target storage is different from the existing one, then the data is
// further copied there. // further copied there.
// //
// Example usage: // Example usage:

View File

@ -480,8 +480,7 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc,
if (cc->Inputs().HasTag(kRectTag)) { if (cc->Inputs().HasTag(kRectTag)) {
const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>(); const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>();
// Only use the rect if it is valid. // Only use the rect if it is valid.
if (rect.width() > 0 && rect.height() > 0 && rect.x_center() >= 0 && if (rect.width() > 0 && rect.height() > 0) {
rect.y_center() >= 0) {
x_center = rect.x_center(); x_center = rect.x_center();
y_center = rect.y_center(); y_center = rect.y_center();
crop_width = rect.width(); crop_width = rect.width();

View File

@ -102,6 +102,10 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// IMAGE: ImageFrame representing the input image. // IMAGE: ImageFrame representing the input image.
// IMAGE_GPU: GpuBuffer representing the input image. // IMAGE_GPU: GpuBuffer representing the input image.
// //
// OUTPUT_DIMENSIONS (optional): The output width and height in pixels as
// pair<int, int>. If set, it will override corresponding field in calculator
// options and input side packet.
//
// ROTATION_DEGREES (optional): The counterclockwise rotation angle in // ROTATION_DEGREES (optional): The counterclockwise rotation angle in
// degrees. This allows different rotation angles for different frames. It has // degrees. This allows different rotation angles for different frames. It has
// to be a multiple of 90 degrees. If provided, it overrides the // to be a multiple of 90 degrees. If provided, it overrides the
@ -221,6 +225,10 @@ absl::Status ImageTransformationCalculator::GetContract(
} }
#endif // !MEDIAPIPE_DISABLE_GPU #endif // !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Set<std::pair<int, int>>();
}
if (cc->Inputs().HasTag("ROTATION_DEGREES")) { if (cc->Inputs().HasTag("ROTATION_DEGREES")) {
cc->Inputs().Tag("ROTATION_DEGREES").Set<int>(); cc->Inputs().Tag("ROTATION_DEGREES").Set<int>();
} }
@ -329,6 +337,16 @@ absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) { !cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>(); flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
} }
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) {
return absl::OkStatus();
} else {
const auto& image_size =
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<std::pair<int, int>>();
output_width_ = image_size.first;
output_height_ = image_size.second;
}
}
if (use_gpu_) { if (use_gpu_) {
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
@ -491,6 +509,14 @@ absl::Status ImageTransformationCalculator::RenderGpu(CalculatorContext* cc) {
ComputeOutputDimensions(input_width, input_height, &output_width, ComputeOutputDimensions(input_width, input_height, &output_width,
&output_height); &output_height);
if (scale_mode_ == mediapipe::ScaleMode_Mode_FILL_AND_CROP) {
const float scale =
std::min(static_cast<float>(output_width_) / input_width,
static_cast<float>(output_height_) / input_height);
output_width = std::round(input_width * scale);
output_height = std::round(input_height * scale);
}
if (cc->Outputs().HasTag("LETTERBOX_PADDING")) { if (cc->Outputs().HasTag("LETTERBOX_PADDING")) {
auto padding = absl::make_unique<std::array<float, 4>>(); auto padding = absl::make_unique<std::array<float, 4>>();
ComputeOutputLetterboxPadding(input_width, input_height, output_width, ComputeOutputLetterboxPadding(input_width, input_height, output_width,

View File

@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) {
scale_image::FindOutputDimensions(crop_width_, crop_height_, // scale_image::FindOutputDimensions(crop_width_, crop_height_, //
options_.target_width(), // options_.target_width(), //
options_.target_height(), // options_.target_height(), //
options_.target_max_area(), //
options_.preserve_aspect_ratio(), // options_.preserve_aspect_ratio(), //
options_.scale_to_multiple_of(), // options_.scale_to_multiple_of(), //
&output_width_, &output_height_)); &output_width_, &output_height_));

View File

@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions {
optional int32 target_width = 1; optional int32 target_width = 1;
optional int32 target_height = 2; optional int32 target_height = 2;
// If set, then automatically calculates a target_width and target_height that
// has an area below the target max area. Aspect ratio preservation cannot be
// disabled.
optional int32 target_max_area = 15;
// If true, the image is scaled up or down proportionally so that it // If true, the image is scaled up or down proportionally so that it
// fits inside the box represented by target_width and target_height. // fits inside the box represented by target_width and target_height.
// Otherwise it is scaled to fit target_width and target_height // Otherwise it is scaled to fit target_width and target_height

View File

@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, //
int input_height, // int input_height, //
int target_width, // int target_width, //
int target_height, // int target_height, //
int target_max_area, //
bool preserve_aspect_ratio, // bool preserve_aspect_ratio, //
int scale_to_multiple_of, // int scale_to_multiple_of, //
int* output_width, int* output_height) { int* output_width, int* output_height) {
CHECK(output_width); CHECK(output_width);
CHECK(output_height); CHECK(output_height);
if (target_max_area > 0 && input_width * input_height > target_max_area) {
preserve_aspect_ratio = true;
target_height = static_cast<int>(sqrt(static_cast<double>(target_max_area) /
(static_cast<double>(input_width) /
static_cast<double>(input_height))));
target_width = -1; // Resize width to preserve aspect ratio.
}
if (preserve_aspect_ratio) { if (preserve_aspect_ratio) {
RET_CHECK(scale_to_multiple_of == 2) RET_CHECK(scale_to_multiple_of == 2)
<< "FindOutputDimensions always outputs width and height that are " << "FindOutputDimensions always outputs width and height that are "
@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, //
<< "Unable to set output dimensions based on target dimensions."; << "Unable to set output dimensions based on target dimensions.";
} }
absl::Status FindOutputDimensions(int input_width, //
int input_height, //
int target_width, //
int target_height, //
bool preserve_aspect_ratio, //
int scale_to_multiple_of, //
int* output_width, int* output_height) {
return FindOutputDimensions(
input_width, input_height, target_width, target_height, -1,
preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height);
}
} // namespace scale_image } // namespace scale_image
} // namespace mediapipe } // namespace mediapipe

View File

@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, //
int* crop_width, int* crop_height, // int* crop_width, int* crop_height, //
int* col_start, int* row_start); int* col_start, int* row_start);
// Given an input width and height, a target width and height, whether to // Given an input width and height, a target width and height or max area,
// preserve the aspect ratio, and whether to round-down to the multiple of a // whether to preserve the aspect ratio, and whether to round-down to the
// given number nearest to the targets, determine the output width and height. // multiple of a given number nearest to the targets, determine the output width
// If target_width or target_height is non-positive, then they will be set to // and height. If target_width or target_height is non-positive, then they will
// the input_width and input_height respectively. If scale_to_multiple_of is // be set to the input_width and input_height respectively. If target_area is
// less than 1, it will be treated like 1. The output_width and // non-positive, then it will be ignored. If scale_to_multiple_of is less than
// output_height will be reduced as necessary to preserve_aspect_ratio if the // 1, it will be treated like 1. The output_width and output_height will be
// option is specified. If preserving the aspect ratio is desired, you must set // reduced as necessary to preserve_aspect_ratio if the option is specified. If
// scale_to_multiple_of to 2. // preserving the aspect ratio is desired, you must set scale_to_multiple_of
// to 2.
absl::Status FindOutputDimensions(int input_width, int input_height, //
int target_width,
int target_height, //
int target_max_area, //
bool preserve_aspect_ratio, //
int scale_to_multiple_of, //
int* output_width, int* output_height);
// Backwards compatible helper.
absl::Status FindOutputDimensions(int input_width, int input_height, // absl::Status FindOutputDimensions(int input_width, int input_height, //
int target_width, int target_width,
int target_height, // int target_height, //

View File

@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) {
int output_width; int output_width;
int output_height; int output_height;
// Not scale. // Not scale.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height); EXPECT_EQ(100, output_height);
// Not scale with odd input size. // Not scale with odd input size.
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(201, output_width); EXPECT_EQ(201, output_width);
EXPECT_EQ(101, output_height); EXPECT_EQ(101, output_height);
// Scale down by 1/2. // Scale down by 1/2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale up, doubling dimensions. // Scale up, doubling dimensions.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(400, output_width); EXPECT_EQ(400, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always // Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
// visible by 2. // visible by 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(150, output_width); EXPECT_EQ(150, output_width);
EXPECT_EQ(74, output_height); EXPECT_EQ(74, output_height);
// Fits a 2:1 image into a 400 x 50 box. // Fits a 2:1 image into a 400 x 50 box.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to multiple number with odd targe size. // Scale to multiple number with odd targe size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to multiple number with odd targe size. // Scale to multiple number with odd targe size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to odd size. // Scale to odd size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(151, output_width); EXPECT_EQ(151, output_width);
EXPECT_EQ(101, output_height); EXPECT_EQ(101, output_height);
} }
@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) {
int output_width; int output_width;
int output_height; int output_height;
// Scale width only. // Scale width only.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(100, output_height); EXPECT_EQ(100, output_height);
// Scale height only. // Scale height only.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Scale both dimensions. // Scale both dimensions.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(150, output_width); EXPECT_EQ(150, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
} }
@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) {
int output_width; int output_width;
int output_height; int output_height;
// Set no targets, downscale to a multiple of 8. // Set no targets, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_width);
EXPECT_EQ(96, output_height); EXPECT_EQ(96, output_height);
// Set width target, downscale to a multiple of 8. // Set width target, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_width);
EXPECT_EQ(96, output_height); EXPECT_EQ(96, output_height);
// Set height target, downscale to a multiple of 8. // Set height target, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Set both targets, downscale to a multiple of 8. // Set both targets, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(144, output_width); EXPECT_EQ(144, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Doesn't throw error if keep aspect is true and downscale multiple is 2. // Doesn't throw error if keep aspect is true and downscale multiple is 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(400, output_width); EXPECT_EQ(400, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Throws error if keep aspect is true, but downscale multiple is not 2. // Throws error if keep aspect is true, but downscale multiple is not 2.
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width, ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4,
&output_height), &output_width, &output_height),
testing::Not(testing::status::IsOk())); testing::Not(testing::status::IsOk()));
// Downscaling to multiple ignored if multiple is less than 2. // Downscaling to multiple ignored if multiple is less than 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(401, output_width); EXPECT_EQ(401, output_width);
EXPECT_EQ(201, output_height); EXPECT_EQ(201, output_height);
} }
// Tests scaling without keeping the aspect ratio fixed.
TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) {
int output_width;
int output_height;
// Smaller area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2,
&output_width, &output_height));
EXPECT_NEAR(
200 / 100,
static_cast<double>(output_width) / static_cast<double>(output_height),
0.1f);
EXPECT_LE(output_width * output_height, 9000);
// Close to original area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2,
&output_width, &output_height));
EXPECT_NEAR(
200.0 / 100.0,
static_cast<double>(output_width) / static_cast<double>(output_height),
0.1f);
EXPECT_LE(output_width * output_height, 19999);
// Don't scale with larger area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
// Don't scale with equal area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
// Don't scale at all.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
}
} // namespace } // namespace
} // namespace scale_image } // namespace scale_image
} // namespace mediapipe } // namespace mediapipe

View File

@ -53,7 +53,7 @@ enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
// The alpha channel can be set to a single value, or come from an image mask. // The alpha channel can be set to a single value, or come from an image mask.
// If the input image has an alpha channel, it will be updated. // If the input image has an alpha channel, it will be updated.
// If the input image doesn't have an alpha channel, one will be added. // If the input image doesn't have an alpha channel, one will be added.
// Adding alpha channel to a Grayscale (single channel) input is not suported. // Adding alpha channel to a Grayscale (single channel) input is not supported.
// //
// Inputs: // Inputs:
// One of the following two IMAGE tags: // One of the following two IMAGE tags:

View File

@ -0,0 +1,211 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/warp_affine_calculator.h"
#include <array>
#include <cstdint>
#include <memory>
#include "mediapipe/calculators/image/affine_transformation.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
#endif // !MEDIAPIPE_DISABLE_GPU
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
#include "mediapipe/calculators/image/warp_affine_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/ret_check.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
namespace {
AffineTransformation::BorderMode GetBorderMode(
mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) {
switch (border_mode) {
case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO:
return AffineTransformation::BorderMode::kZero;
case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED:
case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE:
return AffineTransformation::BorderMode::kReplicate;
}
}
template <typename ImageT>
class WarpAffineRunnerHolder {};
template <>
class WarpAffineRunnerHolder<ImageFrame> {
public:
using RunnerType = AffineTransformation::Runner<ImageFrame, ImageFrame>;
absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); }
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner());
}
return runner_.get();
}
private:
std::unique_ptr<RunnerType> runner_;
};
#if !MEDIAPIPE_DISABLE_GPU
template <>
class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
public:
using RunnerType =
AffineTransformation::Runner<mediapipe::GpuBuffer,
std::unique_ptr<mediapipe::GpuBuffer>>;
absl::Status Open(CalculatorContext* cc) {
gpu_origin_ =
cc->Options<mediapipe::WarpAffineCalculatorOptions>().gpu_origin();
gl_helper_ = std::make_shared<mediapipe::GlCalculatorHelper>();
return gl_helper_->Open(cc);
}
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_));
}
return runner_.get();
}
private:
mediapipe::GpuOrigin::Mode gpu_origin_;
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper_;
std::unique_ptr<RunnerType> runner_;
};
#endif // !MEDIAPIPE_DISABLE_GPU
template <>
class WarpAffineRunnerHolder<mediapipe::Image> {
public:
absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); }
absl::StatusOr<
AffineTransformation::Runner<mediapipe::Image, mediapipe::Image>*>
GetRunner() {
return &runner_;
}
private:
class Runner : public AffineTransformation::Runner<mediapipe::Image,
mediapipe::Image> {
public:
absl::Status Open(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::StatusOr<mediapipe::Image> Run(
const mediapipe::Image& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
if (input.UsesGpu()) {
#if !MEDIAPIPE_DISABLE_GPU
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
size, border_mode));
return mediapipe::Image(*result);
#else
return absl::UnavailableError("GPU support is disabled");
#endif // !MEDIAPIPE_DISABLE_GPU
}
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
const auto& frame_ptr = input.GetImageFrameSharedPtr();
// Wrap image into image frame.
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
frame_ptr->Height(), frame_ptr->WidthStep(),
const_cast<uint8_t*>(frame_ptr->PixelData()),
[](uint8* data) {});
ASSIGN_OR_RETURN(auto result,
runner->Run(image_frame, matrix, size, border_mode));
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
}
private:
WarpAffineRunnerHolder<ImageFrame> cpu_holder_;
#if !MEDIAPIPE_DISABLE_GPU
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
#endif // !MEDIAPIPE_DISABLE_GPU
};
Runner runner_;
};
template <typename InterfaceT>
class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
public:
#if !MEDIAPIPE_DISABLE_GPU
static absl::Status UpdateContract(CalculatorContract* cc) {
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
}
return absl::OkStatus();
}
#endif // !MEDIAPIPE_DISABLE_GPU
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
absl::Status Process(CalculatorContext* cc) override {
if (InterfaceT::kInImage(cc).IsEmpty() ||
InterfaceT::kMatrix(cc).IsEmpty() ||
InterfaceT::kOutputSize(cc).IsEmpty()) {
return absl::OkStatus();
}
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
AffineTransformation::Size output_size;
output_size.width = out_width;
output_size.height = out_height;
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
ASSIGN_OR_RETURN(
auto result,
runner->Run(
*InterfaceT::kInImage(cc), transform, output_size,
GetBorderMode(cc->Options<mediapipe::WarpAffineCalculatorOptions>()
.border_mode())));
InterfaceT::kOutImage(cc).Send(std::move(result));
return absl::OkStatus();
}
private:
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
holder_;
};
} // namespace
MEDIAPIPE_NODE_IMPLEMENTATION(
WarpAffineCalculatorImpl<WarpAffineCalculatorCpu>);
#if !MEDIAPIPE_DISABLE_GPU
MEDIAPIPE_NODE_IMPLEMENTATION(
WarpAffineCalculatorImpl<WarpAffineCalculatorGpu>);
#endif // !MEDIAPIPE_DISABLE_GPU
MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl<WarpAffineCalculator>);
} // namespace mediapipe

View File

@ -0,0 +1,94 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
// Runs affine transformation.
//
// Input:
// IMAGE - Image/ImageFrame/GpuBuffer
//
// MATRIX - std::array<float, 16>
// Used as following:
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
// matrix[4] * x + matrix[5] * y + matrix[7])
// where x and y ranges are defined by @OUTPUT_SIZE.
//
// OUTPUT_SIZE - std::pair<int, int>
// Size of the output image.
//
// Output:
// IMAGE - Image/ImageFrame/GpuBuffer
//
// Note:
// - Output image type and format are the same as the input one.
//
// Usage example:
// node {
// calculator: "WarpAffineCalculator(Cpu|Gpu)"
// input_stream: "IMAGE:image"
// input_stream: "MATRIX:matrix"
// input_stream: "OUTPUT_SIZE:size"
// output_stream: "IMAGE:transformed_image"
// options: {
// [mediapipe.WarpAffineCalculatorOptions.ext] {
// border_mode: BORDER_ZERO
// }
// }
// }
template <typename ImageT>
class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf {
public:
static constexpr mediapipe::api2::Input<ImageT> kInImage{"IMAGE"};
static constexpr mediapipe::api2::Input<std::array<float, 16>> kMatrix{
"MATRIX"};
static constexpr mediapipe::api2::Input<std::pair<int, int>> kOutputSize{
"OUTPUT_SIZE"};
static constexpr mediapipe::api2::Output<ImageT> kOutImage{"IMAGE"};
};
class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf<ImageFrame> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix,
kOutputSize, kOutImage);
};
#if !MEDIAPIPE_DISABLE_GPU
class WarpAffineCalculatorGpu
: public WarpAffineCalculatorIntf<mediapipe::GpuBuffer> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix,
kOutputSize, kOutImage);
};
#endif // !MEDIAPIPE_DISABLE_GPU
class WarpAffineCalculator : public WarpAffineCalculatorIntf<mediapipe::Image> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize,
kOutImage);
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_

View File

@ -0,0 +1,46 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/gpu/gpu_origin.proto";
message WarpAffineCalculatorOptions {
extend CalculatorOptions {
optional WarpAffineCalculatorOptions ext = 373693895;
}
// Pixel extrapolation methods. See @border_mode.
enum BorderMode {
BORDER_UNSPECIFIED = 0;
BORDER_ZERO = 1;
BORDER_REPLICATE = 2;
}
// Pixel extrapolation method.
// When converting image to tensor it may happen that tensor needs to read
// pixels outside image boundaries. Border mode helps to specify how such
// pixels will be calculated.
//
// BORDER_REPLICATE is used by default.
optional BorderMode border_mode = 1;
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
// to be flipped vertically as tensors are expected to start at top.
// (DEFAULT or unset interpreted as CONVENTIONAL.)
optional GpuOrigin.Mode gpu_origin = 2;
}

View File

@ -0,0 +1,615 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <vector>
#include "absl/flags/flag.h"
#include "absl/memory/memory.h"
#include "absl/strings/substitute.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
cv::Mat GetRgb(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3);
int from_to[] = {0, 2, 1, 1, 2, 0};
cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3);
return rgb;
}
cv::Mat GetRgba(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0));
int from_to[] = {0, 2, 1, 1, 2, 0};
cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3);
return bgr;
}
// Test template.
// No processing/assertions should be done after the function is invoked.
void RunTest(const std::string& graph_text, const std::string& tag,
const cv::Mat& input, cv::Mat expected_result,
float similarity_threshold, std::array<float, 16> matrix,
int out_width, int out_height,
absl::optional<AffineTransformation::BorderMode> border_mode) {
std::string border_mode_str;
if (border_mode) {
switch (*border_mode) {
case AffineTransformation::BorderMode::kReplicate:
border_mode_str = "border_mode: BORDER_REPLICATE";
break;
case AffineTransformation::BorderMode::kZero:
border_mode_str = "border_mode: BORDER_ZERO";
break;
}
}
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(graph_text, /*$0=*/border_mode_str));
std::vector<Packet> output_packets;
tool::AddVectorSink("output_image", &graph_config, &output_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
ImageFrame input_image(
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
input.cols, input.rows, input.step, input.data, [](uint8*) {});
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image",
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"matrix",
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"output_size", MakePacket<std::pair<int, int>>(
std::pair<int, int>(out_width, out_height))
.At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_packets, testing::SizeIs(1));
// Get and process results.
const ImageFrame& out_frame = output_packets[0].Get<ImageFrame>();
cv::Mat result = formats::MatView(&out_frame);
double similarity =
1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2);
EXPECT_GE(similarity, similarity_threshold);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
MP_ASSERT_OK(graph.CloseInputStream("output_size"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
enum class InputType { kImageFrame, kImage };
// Similarity is checked against OpenCV results always, and due to differences
// on how OpenCV and GL treats pixels there are two thresholds.
// TODO: update to have just one threshold when OpenCV
// implementation is updated.
struct SimilarityConfig {
double threshold_on_cpu;
double threshold_on_gpu;
};
void RunTest(cv::Mat input, cv::Mat expected_result,
const SimilarityConfig& similarity, std::array<float, 16> matrix,
int out_width, int out_height,
absl::optional<AffineTransformation::BorderMode> border_mode) {
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "WarpAffineCalculatorCpu"
input_stream: "IMAGE:input_image"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
}
}
}
)",
"cpu", input, expected_result, similarity.threshold_on_cpu, matrix,
out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:input_image"
output_stream: "IMAGE:input_image_unified"
}
node {
calculator: "WarpAffineCalculator"
input_stream: "IMAGE:input_image_unified"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_unified"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
}
}
}
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:output_image_unified"
output_stream: "IMAGE_CPU:output_image"
}
)",
"cpu_image", input, expected_result, similarity.threshold_on_cpu,
matrix, out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "input_image"
output_stream: "input_image_gpu"
}
node {
calculator: "WarpAffineCalculatorGpu"
input_stream: "IMAGE:input_image_gpu"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_gpu"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
gpu_origin: TOP_LEFT
}
}
}
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "output_image_gpu"
output_stream: "output_image"
}
)",
"gpu", input, expected_result, similarity.threshold_on_gpu, matrix,
out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "input_image"
output_stream: "input_image_gpu"
}
node {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:input_image_gpu"
output_stream: "IMAGE:input_image_unified"
}
node {
calculator: "WarpAffineCalculator"
input_stream: "IMAGE:input_image_unified"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_unified"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
gpu_origin: TOP_LEFT
}
}
}
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:output_image_unified"
output_stream: "IMAGE_GPU:output_image_gpu"
}
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "output_image_gpu"
output_stream: "output_image"
}
)",
"gpu_image", input, expected_result, similarity.threshold_on_gpu,
matrix, out_width, out_height, border_mode);
}
std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
bool keep_aspect_ratio, int out_width,
int out_height) {
std::array<float, 16> transform_mat;
mediapipe::RotatedRect roi_absolute =
mediapipe::GetRoi(input.cols, input.rows, roi);
mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute)
.IgnoreError();
mediapipe::GetRotatedSubRectToRectTransformMatrix(
roi_absolute, input.cols, input.rows,
/*flip_horizontaly=*/false, &transform_mat);
return transform_mat;
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode = {};
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * 90.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * 90.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * -45.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * -45.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_with_rotation_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(M_PI * -15.0f / 180.0f);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode = {};
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(M_PI * -15.0f / 180.0f);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, NoOp) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.0f);
roi.set_height(1.0f);
roi.set_rotation(0);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png");
int out_width = 64;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, NoOpBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.0f);
roi.set_height(1.0f);
roi.set_rotation(0);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png");
int out_width = 64;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
} // namespace
} // namespace mediapipe

View File

@ -26,6 +26,11 @@ licenses(["notice"])
package(default_visibility = ["//visibility:private"]) package(default_visibility = ["//visibility:private"])
exports_files(
glob(["testdata/image_to_tensor/*"]),
visibility = ["//mediapipe/calculators/image:__subpackages__"],
)
selects.config_setting_group( selects.config_setting_group(
name = "compute_shader_unavailable", name = "compute_shader_unavailable",
match_any = [ match_any = [
@ -351,6 +356,57 @@ cc_library(
alwayslink = 1, alwayslink = 1,
) )
mediapipe_proto_library(
name = "landmarks_to_tensor_calculator_proto",
srcs = ["landmarks_to_tensor_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "landmarks_to_tensor_calculator",
srcs = ["landmarks_to_tensor_calculator.cc"],
hdrs = ["landmarks_to_tensor_calculator.h"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":landmarks_to_tensor_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
],
alwayslink = 1,
)
cc_test(
name = "landmarks_to_tensor_calculator_test",
srcs = ["landmarks_to_tensor_calculator_test.cc"],
deps = [
":landmarks_to_tensor_calculator",
":landmarks_to_tensor_calculator_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/memory",
"@com_google_googletest//:gtest_main",
],
)
mediapipe_proto_library( mediapipe_proto_library(
name = "tensors_to_floats_calculator_proto", name = "tensors_to_floats_calculator_proto",
srcs = ["tensors_to_floats_calculator.proto"], srcs = ["tensors_to_floats_calculator.proto"],

View File

@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer;
// TENSORS - std::vector<Tensor> // TENSORS - std::vector<Tensor>
// Vector containing a single Tensor populated with an extrated RGB image. // Vector containing a single Tensor populated with an extrated RGB image.
// MATRIX - std::array<float, 16> @Optional // MATRIX - std::array<float, 16> @Optional
// An std::array<float, 16> representing a 4x4 row-major-order matrix which // An std::array<float, 16> representing a 4x4 row-major-order matrix that
// can be used to map a point on the output tensor to a point on the input // maps a point on the input image to a point on the output tensor, and
// image. // can be used to reverse the mapping by inverting the matrix.
// LETTERBOX_PADDING - std::array<float, 4> @Optional // LETTERBOX_PADDING - std::array<float, 4> @Optional
// An std::array<float, 4> representing the letterbox padding from the 4 // An std::array<float, 4> representing the letterbox padding from the 4
// sides ([left, top, right, bottom]) of the output image, normalized to // sides ([left, top, right, bottom]) of the output image, normalized to

View File

@ -33,7 +33,7 @@ class InferenceCalculatorSelectorImpl
absl::StatusOr<CalculatorGraphConfig> GetConfig( absl::StatusOr<CalculatorGraphConfig> GetConfig(
const CalculatorGraphConfig::Node& subgraph_node) { const CalculatorGraphConfig::Node& subgraph_node) {
const auto& options = const auto& options =
Subgraph::GetOptions<::mediapipe::InferenceCalculatorOptions>( Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>(
subgraph_node); subgraph_node);
std::vector<absl::string_view> impls; std::vector<absl::string_view> impls;
const bool should_use_gpu = const bool should_use_gpu =

View File

@ -99,8 +99,11 @@ class InferenceCalculator : public NodeIntf {
kSideInCustomOpResolver{"CUSTOM_OP_RESOLVER"}; kSideInCustomOpResolver{"CUSTOM_OP_RESOLVER"};
static constexpr SideInput<TfLiteModelPtr>::Optional kSideInModel{"MODEL"}; static constexpr SideInput<TfLiteModelPtr>::Optional kSideInModel{"MODEL"};
static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"}; static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"};
static constexpr SideInput<
mediapipe::InferenceCalculatorOptions::Delegate>::Optional kDelegate{
"DELEGATE"};
MEDIAPIPE_NODE_CONTRACT(kInTensors, kSideInCustomOpResolver, kSideInModel, MEDIAPIPE_NODE_CONTRACT(kInTensors, kSideInCustomOpResolver, kSideInModel,
kOutTensors); kOutTensors, kDelegate);
protected: protected:
using TfLiteDelegatePtr = using TfLiteDelegatePtr =

View File

@ -18,6 +18,9 @@ package mediapipe;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
option java_package = "com.google.mediapipe.calculator.proto";
option java_outer_classname = "InferenceCalculatorProto";
// Full Example: // Full Example:
// //
// node { // node {
@ -31,7 +34,6 @@ import "mediapipe/framework/calculator.proto";
// } // }
// } // }
// } // }
//
message InferenceCalculatorOptions { message InferenceCalculatorOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional InferenceCalculatorOptions ext = 336783863; optional InferenceCalculatorOptions ext = 336783863;
@ -66,10 +68,55 @@ message InferenceCalculatorOptions {
// Load pre-compiled serialized binary cache to accelerate init process. // Load pre-compiled serialized binary cache to accelerate init process.
// Only available for OpenCL delegate on Android. // Only available for OpenCL delegate on Android.
// Kernel caching will only be enabled if this path is set. // Kernel caching will only be enabled if this path is set.
//
// NOTE: binary cache usage may be skipped if valid serialized model,
// specified by "serialized_model_dir", exists.
//
// TODO: update to cached_kernel_dir
optional string cached_kernel_path = 2; optional string cached_kernel_path = 2;
// A dir to load from and save to a pre-compiled serialized model used to
// accelerate init process.
//
// NOTE: available for OpenCL delegate on Android only when
// "use_advanced_gpu_api" is set to true and "model_token" is set
// properly.
//
// NOTE: serialized model takes precedence over binary cache
// specified by "cached_kernel_path", which still can be used if
// serialized model is invalid or missing.
optional string serialized_model_dir = 7;
// Unique token identifying the model. Used in conjunction with
// "serialized_model_dir". It is the caller's responsibility to ensure
// there is no clash of the tokens.
optional string model_token = 8;
// Encapsulated compilation/runtime tradeoffs.
enum InferenceUsage {
UNSPECIFIED = 0;
// InferenceRunner will be used only once. Therefore, it is important to
// minimize bootstrap time as well.
FAST_SINGLE_ANSWER = 1;
// Prefer maximizing the throughput. Same inference runner will be used
// repeatedly on different inputs.
SUSTAINED_SPEED = 2;
}
optional InferenceUsage usage = 5 [default = SUSTAINED_SPEED];
} }
// Android only. // Android only.
message Nnapi {} message Nnapi {
// Directory to store compilation cache. If unspecified, NNAPI will not
// try caching the compilation.
optional string cache_dir = 1;
// Unique token identifying the model. It is the caller's responsibility
// to ensure there is no clash of the tokens. If unspecified, NNAPI will
// not try caching the compilation.
optional string model_token = 2;
}
message Xnnpack { message Xnnpack {
// Number of threads for XNNPACK delegate. (By default, calculator tries // Number of threads for XNNPACK delegate. (By default, calculator tries
// to choose optimal number of threads depending on the device.) // to choose optimal number of threads depending on the device.)

View File

@ -50,11 +50,13 @@ int GetXnnpackDefaultNumThreads() {
// Returns number of threads to configure XNNPACK delegate with. // Returns number of threads to configure XNNPACK delegate with.
// Returns user provided value if specified. Otherwise, tries to choose optimal // Returns user provided value if specified. Otherwise, tries to choose optimal
// number of threads depending on the device. // number of threads depending on the device.
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) { int GetXnnpackNumThreads(
const bool opts_has_delegate,
const mediapipe::InferenceCalculatorOptions::Delegate& opts_delegate) {
static constexpr int kDefaultNumThreads = -1; static constexpr int kDefaultNumThreads = -1;
if (opts.has_delegate() && opts.delegate().has_xnnpack() && if (opts_has_delegate && opts_delegate.has_xnnpack() &&
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { opts_delegate.xnnpack().num_threads() != kDefaultNumThreads) {
return opts.delegate().xnnpack().num_threads(); return opts_delegate.xnnpack().num_threads();
} }
return GetXnnpackDefaultNumThreads(); return GetXnnpackDefaultNumThreads();
} }
@ -73,6 +75,7 @@ class InferenceCalculatorCpuImpl
private: private:
absl::Status LoadModel(CalculatorContext* cc); absl::Status LoadModel(CalculatorContext* cc);
absl::Status LoadDelegate(CalculatorContext* cc); absl::Status LoadDelegate(CalculatorContext* cc);
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
// TfLite requires us to keep the model alive as long as the interpreter is. // TfLite requires us to keep the model alive as long as the interpreter is.
Packet<TfLiteModelPtr> model_packet_; Packet<TfLiteModelPtr> model_packet_;
@ -91,8 +94,7 @@ absl::Status InferenceCalculatorCpuImpl::UpdateContract(
absl::Status InferenceCalculatorCpuImpl::Open(CalculatorContext* cc) { absl::Status InferenceCalculatorCpuImpl::Open(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadModel(cc)); MP_RETURN_IF_ERROR(LoadModel(cc));
MP_RETURN_IF_ERROR(LoadDelegate(cc)); return LoadDelegateAndAllocateTensors(cc);
return absl::OkStatus();
} }
absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) { absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
@ -156,34 +158,61 @@ absl::Status InferenceCalculatorCpuImpl::LoadModel(CalculatorContext* cc) {
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread()); cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
#endif // __EMSCRIPTEN__ #endif // __EMSCRIPTEN__
return absl::OkStatus();
}
absl::Status InferenceCalculatorCpuImpl::LoadDelegateAndAllocateTensors(
CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadDelegate(cc));
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk); RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors. // TODO: Support quantized tensors.
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type != RET_CHECK_NE(
kTfLiteAffineQuantization); interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
kTfLiteAffineQuantization);
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) { absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
const auto& calculator_opts = const auto& calculator_opts =
cc->Options<mediapipe::InferenceCalculatorOptions>(); cc->Options<mediapipe::InferenceCalculatorOptions>();
if (calculator_opts.has_delegate() && auto opts_delegate = calculator_opts.delegate();
calculator_opts.delegate().has_tflite()) { if (!kDelegate(cc).IsEmpty()) {
mediapipe::InferenceCalculatorOptions::Delegate input_side_packet_delegate =
kDelegate(cc).Get();
CHECK(input_side_packet_delegate.has_tflite() ||
input_side_packet_delegate.has_xnnpack() ||
input_side_packet_delegate.has_nnapi() ||
input_side_packet_delegate.delegate_case() ==
mediapipe::InferenceCalculatorOptions::Delegate::DELEGATE_NOT_SET)
<< "inference_calculator_cpu only supports delegate input side packet "
<< "for TFLite, XNNPack and Nnapi";
opts_delegate.MergeFrom(input_side_packet_delegate);
}
const bool opts_has_delegate =
calculator_opts.has_delegate() || !kDelegate(cc).IsEmpty();
if (opts_has_delegate && opts_delegate.has_tflite()) {
// Default tflite inference requeqsted - no need to modify graph. // Default tflite inference requeqsted - no need to modify graph.
return absl::OkStatus(); return absl::OkStatus();
} }
#if defined(MEDIAPIPE_ANDROID) #if defined(MEDIAPIPE_ANDROID)
const bool nnapi_requested = calculator_opts.has_delegate() const bool nnapi_requested = opts_has_delegate ? opts_delegate.has_nnapi()
? calculator_opts.delegate().has_nnapi() : calculator_opts.use_nnapi();
: calculator_opts.use_nnapi();
if (nnapi_requested) { if (nnapi_requested) {
// Attempt to use NNAPI. // Attempt to use NNAPI.
// If not supported, the default CPU delegate will be created and used. // If not supported, the default CPU delegate will be created and used.
interpreter_->SetAllowFp16PrecisionForFp32(1); interpreter_->SetAllowFp16PrecisionForFp32(1);
delegate_ = TfLiteDelegatePtr(tflite::NnApiDelegate(), [](TfLiteDelegate*) { tflite::StatefulNnApiDelegate::Options options;
// No need to free according to tflite::NnApiDelegate() documentation. const auto& nnapi = opts_delegate.nnapi();
}); // Set up cache_dir and model_token for NNAPI compilation cache.
options.cache_dir =
nnapi.has_cache_dir() ? nnapi.cache_dir().c_str() : nullptr;
options.model_token =
nnapi.has_model_token() ? nnapi.model_token().c_str() : nullptr;
delegate_ = TfLiteDelegatePtr(new tflite::StatefulNnApiDelegate(options),
[](TfLiteDelegate*) {});
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()), RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk); kTfLiteOk);
return absl::OkStatus(); return absl::OkStatus();
@ -193,13 +222,13 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
#if defined(__EMSCRIPTEN__) #if defined(__EMSCRIPTEN__)
const bool use_xnnpack = true; const bool use_xnnpack = true;
#else #else
const bool use_xnnpack = calculator_opts.has_delegate() && const bool use_xnnpack = opts_has_delegate && opts_delegate.has_xnnpack();
calculator_opts.delegate().has_xnnpack();
#endif // defined(__EMSCRIPTEN__) #endif // defined(__EMSCRIPTEN__)
if (use_xnnpack) { if (use_xnnpack) {
TfLiteXNNPackDelegateOptions xnnpack_opts{}; TfLiteXNNPackDelegateOptions xnnpack_opts{};
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts); xnnpack_opts.num_threads =
GetXnnpackNumThreads(opts_has_delegate, opts_delegate);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts), delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
&TfLiteXNNPackDelegateDelete); &TfLiteXNNPackDelegateDelete);
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()), RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),

View File

@ -18,7 +18,9 @@
#include <vector> #include <vector>
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "mediapipe/calculators/tensor/inference_calculator.h" #include "mediapipe/calculators/tensor/inference_calculator.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/util/tflite/config.h" #include "mediapipe/util/tflite/config.h"
#if MEDIAPIPE_TFLITE_GL_INFERENCE #if MEDIAPIPE_TFLITE_GL_INFERENCE
@ -48,10 +50,11 @@ class InferenceCalculatorGlImpl
absl::Status Close(CalculatorContext* cc) override; absl::Status Close(CalculatorContext* cc) override;
private: private:
absl::Status ReadKernelsFromFile(); absl::Status ReadGpuCaches();
absl::Status WriteKernelsToFile(); absl::Status SaveGpuCaches();
absl::Status LoadModel(CalculatorContext* cc); absl::Status LoadModel(CalculatorContext* cc);
absl::Status LoadDelegate(CalculatorContext* cc); absl::Status LoadDelegate(CalculatorContext* cc);
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
absl::Status InitTFLiteGPURunner(CalculatorContext* cc); absl::Status InitTFLiteGPURunner(CalculatorContext* cc);
// TfLite requires us to keep the model alive as long as the interpreter is. // TfLite requires us to keep the model alive as long as the interpreter is.
@ -65,6 +68,8 @@ class InferenceCalculatorGlImpl
bool allow_precision_loss_ = false; bool allow_precision_loss_ = false;
mediapipe::InferenceCalculatorOptions::Delegate::Gpu::Api mediapipe::InferenceCalculatorOptions::Delegate::Gpu::Api
tflite_gpu_runner_api_; tflite_gpu_runner_api_;
mediapipe::InferenceCalculatorOptions::Delegate::Gpu::InferenceUsage
tflite_gpu_runner_usage_;
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE #endif // MEDIAPIPE_TFLITE_GL_INFERENCE
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED #if MEDIAPIPE_TFLITE_GPU_SUPPORTED
@ -78,6 +83,8 @@ class InferenceCalculatorGlImpl
bool use_kernel_caching_ = false; bool use_kernel_caching_ = false;
std::string cached_kernel_filename_; std::string cached_kernel_filename_;
bool use_serialized_model_ = false;
std::string serialized_model_path_;
}; };
absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) { absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
@ -91,22 +98,43 @@ absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) { absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>(); const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
use_advanced_gpu_api_ = options.has_delegate() && mediapipe::InferenceCalculatorOptions::Delegate delegate = options.delegate();
options.delegate().has_gpu() && if (!kDelegate(cc).IsEmpty()) {
options.delegate().gpu().use_advanced_gpu_api(); mediapipe::InferenceCalculatorOptions::Delegate input_side_packet_delegate =
allow_precision_loss_ = options.delegate().gpu().allow_precision_loss(); kDelegate(cc).Get();
tflite_gpu_runner_api_ = options.delegate().gpu().api(); CHECK(input_side_packet_delegate.has_gpu() ||
use_kernel_caching_ = use_advanced_gpu_api_ && input_side_packet_delegate.delegate_case() ==
options.delegate().gpu().has_cached_kernel_path(); mediapipe::InferenceCalculatorOptions::Delegate::DELEGATE_NOT_SET)
<< "inference_calculator_gl only supports delegate input side packet "
<< "for Gpu";
delegate.MergeFrom(input_side_packet_delegate);
}
const bool has_delegate = options.has_delegate() || !kDelegate(cc).IsEmpty();
use_advanced_gpu_api_ = has_delegate && delegate.has_gpu() &&
delegate.gpu().use_advanced_gpu_api();
allow_precision_loss_ = delegate.gpu().allow_precision_loss();
tflite_gpu_runner_api_ = delegate.gpu().api();
tflite_gpu_runner_usage_ = delegate.gpu().usage();
use_kernel_caching_ =
use_advanced_gpu_api_ && delegate.gpu().has_cached_kernel_path();
use_serialized_model_ = use_advanced_gpu_api_ &&
delegate.gpu().has_serialized_model_dir() &&
delegate.gpu().has_model_token();
use_gpu_delegate_ = !use_advanced_gpu_api_; use_gpu_delegate_ = !use_advanced_gpu_api_;
if (use_kernel_caching_) { if (use_kernel_caching_) {
#ifdef MEDIAPIPE_ANDROID #ifdef MEDIAPIPE_ANDROID
cached_kernel_filename_ = options.delegate().gpu().cached_kernel_path() + cached_kernel_filename_ = delegate.gpu().cached_kernel_path() +
mediapipe::File::Basename(options.model_path()) + mediapipe::File::Basename(options.model_path()) +
".ker"; ".ker";
#endif // MEDIAPIPE_ANDROID #endif // MEDIAPIPE_ANDROID
} }
if (use_serialized_model_) {
#ifdef MEDIAPIPE_ANDROID
serialized_model_path_ = mediapipe::file::JoinPath(
delegate.gpu().serialized_model_dir(), delegate.gpu().model_token());
#endif // MEDIAPIPE_ANDROID
}
// When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner // When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
// for everything. // for everything.
@ -115,10 +143,11 @@ absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
} }
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, MP_RETURN_IF_ERROR(
&cc]() -> ::mediapipe::Status { gpu_helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc) : LoadDelegate(cc); return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc)
})); : LoadDelegateAndAllocateTensors(cc);
}));
return absl::OkStatus(); return absl::OkStatus();
} }
@ -193,7 +222,7 @@ absl::Status InferenceCalculatorGlImpl::Process(CalculatorContext* cc) {
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() { absl::Status InferenceCalculatorGlImpl::SaveGpuCaches() {
#ifdef MEDIAPIPE_ANDROID #ifdef MEDIAPIPE_ANDROID
if (use_kernel_caching_) { if (use_kernel_caching_) {
// Save kernel file. // Save kernel file.
@ -203,12 +232,22 @@ absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() {
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(
mediapipe::file::SetContents(cached_kernel_filename_, cache_str)); mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
} }
if (use_serialized_model_) {
// Save serialized model file.
ASSIGN_OR_RETURN(std::vector<uint8_t> serialized_model_vec,
tflite_gpu_runner_->GetSerializedModel());
absl::string_view serialized_model(
reinterpret_cast<char*>(serialized_model_vec.data()),
serialized_model_vec.size());
MP_RETURN_IF_ERROR(
mediapipe::file::SetContents(serialized_model_path_, serialized_model));
}
#endif // MEDIAPIPE_ANDROID #endif // MEDIAPIPE_ANDROID
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) { absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(WriteKernelsToFile()); MP_RETURN_IF_ERROR(SaveGpuCaches());
if (use_gpu_delegate_) { if (use_gpu_delegate_) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
gpu_buffers_in_.clear(); gpu_buffers_in_.clear();
@ -222,17 +261,24 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status InferenceCalculatorGlImpl::ReadKernelsFromFile() { absl::Status InferenceCalculatorGlImpl::ReadGpuCaches() {
#ifdef MEDIAPIPE_ANDROID #ifdef MEDIAPIPE_ANDROID
if (use_kernel_caching_) { if (use_kernel_caching_ && File::Exists(cached_kernel_filename_)) {
// Load pre-compiled kernel file. // Load pre-compiled kernel file.
if (mediapipe::File::Exists(cached_kernel_filename_)) { std::string cache_str;
std::string cache_str; MP_RETURN_IF_ERROR(
MP_RETURN_IF_ERROR( mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
mediapipe::file::GetContents(cached_kernel_filename_, &cache_str)); std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end()); tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec)); }
} if (use_serialized_model_ && File::Exists(serialized_model_path_)) {
// Load serialized model file.
std::string serialized_model_str;
MP_RETURN_IF_ERROR(
file::GetContents(serialized_model_path_, &serialized_model_str));
std::vector<uint8_t> serialized_model_vec(serialized_model_str.begin(),
serialized_model_str.end());
tflite_gpu_runner_->SetSerializedModel(std::move(serialized_model_vec));
} }
#endif // MEDIAPIPE_ANDROID #endif // MEDIAPIPE_ANDROID
return absl::OkStatus(); return absl::OkStatus();
@ -253,9 +299,27 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
: tflite::gpu::InferencePriority::MAX_PRECISION; : tflite::gpu::InferencePriority::MAX_PRECISION;
options.priority2 = tflite::gpu::InferencePriority::AUTO; options.priority2 = tflite::gpu::InferencePriority::AUTO;
options.priority3 = tflite::gpu::InferencePriority::AUTO; options.priority3 = tflite::gpu::InferencePriority::AUTO;
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED; switch (tflite_gpu_runner_usage_) {
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
FAST_SINGLE_ANSWER: {
options.usage = tflite::gpu::InferenceUsage::FAST_SINGLE_ANSWER;
break;
}
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
SUSTAINED_SPEED: {
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
break;
}
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::UNSPECIFIED: {
return absl::InternalError("inference usage need to be specified.");
}
}
tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options); tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
switch (tflite_gpu_runner_api_) { switch (tflite_gpu_runner_api_) {
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::ANY: {
// Do not need to force any specific API.
break;
}
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENGL: { case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENGL: {
tflite_gpu_runner_->ForceOpenGL(); tflite_gpu_runner_->ForceOpenGL();
break; break;
@ -264,10 +328,6 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
tflite_gpu_runner_->ForceOpenCL(); tflite_gpu_runner_->ForceOpenCL();
break; break;
} }
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::ANY: {
// Do not need to force any specific API.
break;
}
} }
MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel( MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
model, op_resolver, /*allow_quant_ops=*/true)); model, op_resolver, /*allow_quant_ops=*/true));
@ -282,7 +342,7 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
tflite_gpu_runner_->GetOutputShapes()[i].c}; tflite_gpu_runner_->GetOutputShapes()[i].c};
} }
MP_RETURN_IF_ERROR(ReadKernelsFromFile()); MP_RETURN_IF_ERROR(ReadGpuCaches());
MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build()); MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
@ -306,11 +366,19 @@ absl::Status InferenceCalculatorGlImpl::LoadModel(CalculatorContext* cc) {
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread()); cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
#endif // __EMSCRIPTEN__ #endif // __EMSCRIPTEN__
return absl::OkStatus();
}
absl::Status InferenceCalculatorGlImpl::LoadDelegateAndAllocateTensors(
CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadDelegate(cc));
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk); RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors. // TODO: Support quantized tensors.
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type != RET_CHECK_NE(
kTfLiteAffineQuantization); interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
kTfLiteAffineQuantization);
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -92,6 +92,7 @@ class InferenceCalculatorMetalImpl
private: private:
absl::Status LoadModel(CalculatorContext* cc); absl::Status LoadModel(CalculatorContext* cc);
absl::Status LoadDelegate(CalculatorContext* cc); absl::Status LoadDelegate(CalculatorContext* cc);
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
// TfLite requires us to keep the model alive as long as the interpreter is. // TfLite requires us to keep the model alive as long as the interpreter is.
Packet<TfLiteModelPtr> model_packet_; Packet<TfLiteModelPtr> model_packet_;
@ -130,8 +131,7 @@ absl::Status InferenceCalculatorMetalImpl::Open(CalculatorContext* cc) {
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(gpu_helper_); RET_CHECK(gpu_helper_);
MP_RETURN_IF_ERROR(LoadDelegate(cc)); return LoadDelegateAndAllocateTensors(cc);
return absl::OkStatus();
} }
absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) { absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) {
@ -212,11 +212,19 @@ absl::Status InferenceCalculatorMetalImpl::LoadModel(CalculatorContext* cc) {
interpreter_->SetNumThreads( interpreter_->SetNumThreads(
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread()); cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
return absl::OkStatus();
}
absl::Status InferenceCalculatorMetalImpl::LoadDelegateAndAllocateTensors(
CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadDelegate(cc));
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk); RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors. // TODO: Support quantized tensors.
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type != RET_CHECK_NE(
kTfLiteAffineQuantization); interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
kTfLiteAffineQuantization);
return absl::OkStatus(); return absl::OkStatus();
} }
@ -236,6 +244,7 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) {
TfLiteDelegatePtr(TFLGpuDelegateCreate(&options), &TFLGpuDelegateDelete); TfLiteDelegatePtr(TFLGpuDelegateCreate(&options), &TFLGpuDelegateDelete);
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()), RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk); kTfLiteOk);
id<MTLDevice> device = gpu_helper_.mtlDevice; id<MTLDevice> device = gpu_helper_.mtlDevice;
// Get input image sizes. // Get input image sizes.

View File

@ -0,0 +1,101 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.h"
#include <memory>
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace api2 {
namespace {
float GetAttribute(
const Landmark& landmark,
const LandmarksToTensorCalculatorOptions::Attribute& attribute) {
switch (attribute) {
case LandmarksToTensorCalculatorOptions::X:
return landmark.x();
case LandmarksToTensorCalculatorOptions::Y:
return landmark.y();
case LandmarksToTensorCalculatorOptions::Z:
return landmark.z();
case LandmarksToTensorCalculatorOptions::VISIBILITY:
return landmark.visibility();
case LandmarksToTensorCalculatorOptions::PRESENCE:
return landmark.presence();
}
}
} // namespace
class LandmarksToTensorCalculatorImpl
: public NodeImpl<LandmarksToTensorCalculator> {
public:
absl::Status Open(CalculatorContext* cc) override {
options_ = cc->Options<LandmarksToTensorCalculatorOptions>();
RET_CHECK(options_.attributes_size() > 0)
<< "At least one attribute must be specified";
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (kInLandmarkList(cc).IsEmpty()) {
return absl::OkStatus();
}
// Get input landmarks.
const auto& in_landmarks = *kInLandmarkList(cc);
// Determine tensor shape.
const int n_landmarks = in_landmarks.landmark_size();
const int n_attributes = options_.attributes_size();
auto tensor_shape = options_.flatten()
? Tensor::Shape{1, n_landmarks * n_attributes}
: Tensor::Shape{1, n_landmarks, n_attributes};
// Create empty tesnor.
Tensor tensor(Tensor::ElementType::kFloat32, tensor_shape);
auto* buffer = tensor.GetCpuWriteView().buffer<float>();
// Fill tensor with landmark attributes.
for (int i = 0; i < n_landmarks; ++i) {
for (int j = 0; j < n_attributes; ++j) {
buffer[i * n_attributes + j] =
GetAttribute(in_landmarks.landmark(i), options_.attributes(j));
}
}
// Return vector with a single tensor.
auto result = std::vector<Tensor>();
result.push_back(std::move(tensor));
kOutTensors(cc).Send(std::move(result));
return absl::OkStatus();
}
private:
LandmarksToTensorCalculatorOptions options_;
};
MEDIAPIPE_NODE_IMPLEMENTATION(LandmarksToTensorCalculatorImpl);
} // namespace api2
} // namespace mediapipe

View File

@ -0,0 +1,61 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_
#include <memory>
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
namespace mediapipe {
namespace api2 {
// A calculator for converting landmars into a Tensor.
//
// Input:
// LANDMARKS - LandmarkList
// Landmarks to be converted into a Tensor.
//
// Output:
// TENSORS - std::vector<Tensor>
// Vector containing a single Tensor populated with landmark values.
//
// Example:
// node {
// calculator: "LandmarksToTensorCalculator"
// input_stream: "LANDMARKS:landmarks"
// output_stream: "TENSORS:tensors"
// options: {
// [mediapipe.LandmarksToTensorCalculatorOptions.ext] {
// attributes: [X, Y, Z, VISIBILITY, PRESENCE]
// # flatten: true
// }
// }
// }
class LandmarksToTensorCalculator : public NodeIntf {
public:
static constexpr Input<LandmarkList>::Optional kInLandmarkList{"LANDMARKS"};
static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"};
MEDIAPIPE_NODE_INTERFACE(LandmarksToTensorCalculator, kInLandmarkList,
kOutTensors);
};
} // namespace api2
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_

View File

@ -0,0 +1,44 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The option proto for the LandmarksToTensorCalculator.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message LandmarksToTensorCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional LandmarksToTensorCalculatorOptions ext = 394810235;
}
enum Attribute {
X = 0;
Y = 1;
Z = 2;
VISIBILITY = 3;
PRESENCE = 4;
}
// Subset and order of attributes as they should appear in the output Tensor.
// Should contain at least one attribute.
repeated Attribute attributes = 1;
// Collapses all landmark attributes into a one dimensional tensor (i.e.
// switches from (n_landmarks, n_attributes) to (n_landmarks * n_attributes)
// representation).
optional bool flatten = 2 [default = false];
}

View File

@ -0,0 +1,155 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "absl/memory/memory.h"
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.pb.h"
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
using ::mediapipe::ParseTextProtoOrDie;
using Node = ::mediapipe::CalculatorGraphConfig::Node;
void RunLandmarks(mediapipe::CalculatorRunner* runner,
const LandmarkList& landmarks) {
runner->MutableInputs()
->Tag("LANDMARKS")
.packets.push_back(MakePacket<LandmarkList>(landmarks).At(Timestamp(0)));
MP_ASSERT_OK(runner->Run());
}
const Tensor& GetOutputTensor(mediapipe::CalculatorRunner* runner) {
const auto& output_packets = runner->Outputs().Tag("TENSORS").packets;
EXPECT_EQ(output_packets.size(), 1);
const auto& tensors = output_packets[0].Get<std::vector<Tensor>>();
EXPECT_EQ(tensors.size(), 1);
return tensors[0];
}
void ValidateTensor(const Tensor& tensor,
const std::vector<int>& expected_shape,
const std::vector<float>& expected_values) {
EXPECT_EQ(tensor.shape().dims, expected_shape);
EXPECT_EQ(tensor.shape().num_elements(), expected_values.size());
auto* tensor_buffer = tensor.GetCpuReadView().buffer<float>();
const std::vector<float> tensor_values(
tensor_buffer, tensor_buffer + tensor.shape().num_elements());
EXPECT_THAT(tensor_values, testing::ElementsAreArray(expected_values));
}
TEST(LandmarksToTensorCalculatorTest, AllAttributes) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
calculator: "LandmarksToTensorCalculator"
input_stream: "LANDMARKS:landmarks"
output_stream: "TENSORS:tensors"
options: {
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
attributes: [ X, Y, Z, VISIBILITY, PRESENCE ]
}
}
)pb"));
LandmarkList landmarks;
auto* landmark1 = landmarks.add_landmark();
landmark1->set_x(1.0f);
landmark1->set_y(2.0f);
landmark1->set_z(3.0f);
landmark1->set_visibility(4.0f);
landmark1->set_presence(5.0f);
auto* landmark2 = landmarks.add_landmark();
landmark2->set_x(6.0f);
landmark2->set_y(7.0f);
landmark2->set_z(8.0f);
landmark2->set_visibility(9.0f);
landmark2->set_presence(10.0f);
RunLandmarks(&runner, landmarks);
const auto& tensor = GetOutputTensor(&runner);
ValidateTensor(tensor, /*expected_shape=*/{1, 2, 5}, /*expected_values=*/
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f});
}
TEST(LandmarksToTensorCalculatorTest, XYZAttributes) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
calculator: "LandmarksToTensorCalculator"
input_stream: "LANDMARKS:landmarks"
output_stream: "TENSORS:tensors"
options: {
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
attributes: [ X, Y, Z ]
}
}
)pb"));
LandmarkList landmarks;
auto* landmark1 = landmarks.add_landmark();
landmark1->set_x(1.0f);
landmark1->set_y(2.0f);
landmark1->set_z(3.0f);
auto* landmark2 = landmarks.add_landmark();
landmark2->set_x(6.0f);
landmark2->set_y(7.0f);
landmark2->set_z(8.0f);
RunLandmarks(&runner, landmarks);
const auto& tensor = GetOutputTensor(&runner);
ValidateTensor(tensor, /*expected_shape=*/{1, 2, 3}, /*expected_values=*/
{1.0f, 2.0f, 3.0f, 6.0f, 7.0f, 8.0f});
}
TEST(LandmarksToTensorCalculatorTest, XYZAttributes_Flatten) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
calculator: "LandmarksToTensorCalculator"
input_stream: "LANDMARKS:landmarks"
output_stream: "TENSORS:tensors"
options: {
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
attributes: [ X, Y, Z ]
flatten: true
}
}
)pb"));
LandmarkList landmarks;
auto* landmark1 = landmarks.add_landmark();
landmark1->set_x(1.0f);
landmark1->set_y(2.0f);
landmark1->set_z(3.0f);
auto* landmark2 = landmarks.add_landmark();
landmark2->set_x(6.0f);
landmark2->set_y(7.0f);
landmark2->set_z(8.0f);
RunLandmarks(&runner, landmarks);
const auto& tensor = GetOutputTensor(&runner);
ValidateTensor(tensor, /*expected_shape=*/{1, 6}, /*expected_values=*/
{1.0f, 2.0f, 3.0f, 6.0f, 7.0f, 8.0f});
}
} // namespace
} // namespace mediapipe

View File

@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
uniform sampler2D frame; uniform sampler2D frame;
void main() { void main() {
$1 // flip vec2 coord = $1
vec4 pixel = texture2D(frame, sample_coordinate); vec4 pixel = texture2D(frame, coord);
$2 // normalize [-1,1] $2 // normalize [-1,1]
fragColor.r = pixel.r; // r channel fragColor.r = pixel.r; // r channel
$3 // g & b channels $3 // g & b channels
@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
})", })",
/*$0=*/single_channel ? "vec1" : "vec4", /*$0=*/single_channel ? "vec1" : "vec4",
/*$1=*/ /*$1=*/
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;" flip_vertically_
: "", ? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
: "sample_coordinate;",
/*$2=*/output_range_.has_value() /*$2=*/output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);", ? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first), (output_range_->second - output_range_->first),

View File

@ -670,7 +670,8 @@ absl::Status TensorsToDetectionsCalculator::ConvertToDetections(
detection_boxes[box_offset + 2], detection_boxes[box_offset + 3], detection_boxes[box_offset + 2], detection_boxes[box_offset + 3],
detection_scores[i], detection_classes[i], options_.flip_vertically()); detection_scores[i], detection_classes[i], options_.flip_vertically());
const auto& bbox = detection.location_data().relative_bounding_box(); const auto& bbox = detection.location_data().relative_bounding_box();
if (bbox.width() < 0 || bbox.height() < 0) { if (bbox.width() < 0 || bbox.height() < 0 || std::isnan(bbox.width()) ||
std::isnan(bbox.height())) {
// Decoded detection boxes could have negative values for width/height due // Decoded detection boxes could have negative values for width/height due
// to model prediction. Filter out those boxes since some downstream // to model prediction. Filter out those boxes since some downstream
// calculators may assume non-negative values. (b/171391719) // calculators may assume non-negative values. (b/171391719)

View File

@ -138,7 +138,6 @@ using ::tflite::gpu::gl::GlShader;
// } // }
// } // }
// //
// Currently only OpenGLES 3.1 and CPU backends supported.
// TODO Refactor and add support for other backends/platforms. // TODO Refactor and add support for other backends/platforms.
// //
class TensorsToSegmentationCalculator : public CalculatorBase { class TensorsToSegmentationCalculator : public CalculatorBase {

View File

@ -88,6 +88,13 @@ proto_library(
deps = ["//mediapipe/framework:calculator_proto"], deps = ["//mediapipe/framework:calculator_proto"],
) )
proto_library(
name = "tensor_to_vector_string_calculator_options_proto",
srcs = ["tensor_to_vector_string_calculator_options.proto"],
visibility = ["//visibility:public"],
deps = ["//mediapipe/framework:calculator_proto"],
)
proto_library( proto_library(
name = "unpack_media_sequence_calculator_proto", name = "unpack_media_sequence_calculator_proto",
srcs = ["unpack_media_sequence_calculator.proto"], srcs = ["unpack_media_sequence_calculator.proto"],
@ -257,6 +264,14 @@ mediapipe_cc_proto_library(
deps = [":tensor_to_vector_float_calculator_options_proto"], deps = [":tensor_to_vector_float_calculator_options_proto"],
) )
mediapipe_cc_proto_library(
name = "tensor_to_vector_string_calculator_options_cc_proto",
srcs = ["tensor_to_vector_string_calculator_options.proto"],
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
visibility = ["//visibility:public"],
deps = [":tensor_to_vector_string_calculator_options_proto"],
)
mediapipe_cc_proto_library( mediapipe_cc_proto_library(
name = "unpack_media_sequence_calculator_cc_proto", name = "unpack_media_sequence_calculator_cc_proto",
srcs = ["unpack_media_sequence_calculator.proto"], srcs = ["unpack_media_sequence_calculator.proto"],
@ -572,9 +587,21 @@ cc_library(
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
] + select({ ] + select({
"//conditions:default": [ "//conditions:default": [
"//mediapipe/framework/port:file_helpers",
], ],
}), "//mediapipe:android": [],
}) + select(
{
"//conditions:default": [
],
},
) + select(
{
"//conditions:default": [
],
"//mediapipe:android": [
],
},
),
alwayslink = 1, alwayslink = 1,
) )
@ -694,6 +721,26 @@ cc_library(
alwayslink = 1, alwayslink = 1,
) )
cc_library(
name = "tensor_to_vector_string_calculator",
srcs = ["tensor_to_vector_string_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:ret_check",
":tensor_to_vector_string_calculator_options_cc_proto",
] + select({
"//conditions:default": [
"@org_tensorflow//tensorflow/core:framework",
],
"//mediapipe:android": [
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
],
}),
alwayslink = 1,
)
cc_library( cc_library(
name = "unpack_media_sequence_calculator", name = "unpack_media_sequence_calculator",
srcs = ["unpack_media_sequence_calculator.cc"], srcs = ["unpack_media_sequence_calculator.cc"],
@ -864,6 +911,7 @@ cc_test(
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator_cc_proto", "//mediapipe/calculators/tensorflow:pack_media_sequence_calculator_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner", "//mediapipe/framework:calculator_runner",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:detection_cc_proto", "//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:image_frame_opencv",
@ -1058,6 +1106,20 @@ cc_test(
], ],
) )
cc_test(
name = "tensor_to_vector_string_calculator_test",
srcs = ["tensor_to_vector_string_calculator_test.cc"],
deps = [
":tensor_to_vector_string_calculator",
":tensor_to_vector_string_calculator_options_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/port:gtest_main",
"@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:protos_all_cc",
],
)
cc_test( cc_test(
name = "unpack_media_sequence_calculator_test", name = "unpack_media_sequence_calculator_test",
srcs = ["unpack_media_sequence_calculator_test.cc"], srcs = ["unpack_media_sequence_calculator_test.cc"],

View File

@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
const char kImageTag[] = "IMAGE"; const char kImageTag[] = "IMAGE";
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_"; const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_"; const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_";
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED"; const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
const char kBBoxTag[] = "BBOX"; const char kBBoxTag[] = "BBOX";
const char kKeypointsTag[] = "KEYPOINTS"; const char kKeypointsTag[] = "KEYPOINTS";
@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) { if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<float>>(); cc->Inputs().Tag(tag).Set<std::vector<float>>();
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<std::string>>();
}
} }
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) || CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase {
mpms::ClearFeatureFloats(key, sequence_.get()); mpms::ClearFeatureFloats(key, sequence_.get());
mpms::ClearFeatureTimestamp(key, sequence_.get()); mpms::ClearFeatureTimestamp(key, sequence_.get());
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
sizeof(*kBytesFeaturePrefixTag) -
1);
mpms::ClearFeatureBytes(key, sequence_.get());
mpms::ClearFeatureTimestamp(key, sequence_.get());
}
if (absl::StartsWith(tag, kKeypointsTag)) { if (absl::StartsWith(tag, kKeypointsTag)) {
std::string key = std::string key =
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1); tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
@ -243,11 +254,6 @@ class PackMediaSequenceCalculator : public CalculatorBase {
} }
} }
if (cc->Outputs().HasTag(kSequenceExampleTag)) {
cc->Outputs()
.Tag(kSequenceExampleTag)
.SetNextTimestampBound(Timestamp::Max());
}
return absl::OkStatus(); return absl::OkStatus();
} }
@ -305,7 +311,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
if (cc->Outputs().HasTag(kSequenceExampleTag)) { if (cc->Outputs().HasTag(kSequenceExampleTag)) {
cc->Outputs() cc->Outputs()
.Tag(kSequenceExampleTag) .Tag(kSequenceExampleTag)
.Add(sequence_.release(), Timestamp::PostStream()); .Add(sequence_.release(), options.output_as_zero_timestamp()
? Timestamp(0ll)
: Timestamp::PostStream());
} }
sequence_.reset(); sequence_.reset();
@ -408,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
cc->Inputs().Tag(tag).Get<std::vector<float>>(), cc->Inputs().Tag(tag).Get<std::vector<float>>(),
sequence_.get()); sequence_.get());
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag) &&
!cc->Inputs().Tag(tag).IsEmpty()) {
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
sizeof(*kBytesFeaturePrefixTag) -
1);
mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(),
sequence_.get());
mpms::AddFeatureBytes(
key, cc->Inputs().Tag(tag).Get<std::vector<std::string>>(),
sequence_.get());
}
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) { if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
std::string key = ""; std::string key = "";
if (tag != kBBoxTag) { if (tag != kBBoxTag) {

View File

@ -65,4 +65,7 @@ message PackMediaSequenceCalculatorOptions {
// If true, will return an error status if an output sequence would be too // If true, will return an error status if an output sequence would be too
// many bytes to serialize. // many bytes to serialize.
optional bool skip_large_sequences = 7 [default = true]; optional bool skip_large_sequences = 7 [default = true];
// If true/false, outputs the SequenceExample at timestamp 0/PostStream.
optional bool output_as_zero_timestamp = 8 [default = false];
} }

View File

@ -29,6 +29,7 @@
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h" #include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/timestamp.h"
#include "mediapipe/util/sequence/media_sequence.h" #include "mediapipe/util/sequence/media_sequence.h"
#include "tensorflow/core/example/example.pb.h" #include "tensorflow/core/example/example.pb.h"
#include "tensorflow/core/example/feature.pb.h" #include "tensorflow/core/example/feature.pb.h"
@ -39,12 +40,33 @@ namespace {
namespace tf = ::tensorflow; namespace tf = ::tensorflow;
namespace mpms = mediapipe::mediasequence; namespace mpms = mediapipe::mediasequence;
constexpr char kBboxTag[] = "BBOX";
constexpr char kEncodedMediaStartTimestampTag[] =
"ENCODED_MEDIA_START_TIMESTAMP";
constexpr char kEncodedMediaTag[] = "ENCODED_MEDIA";
constexpr char kClassSegmentationTag[] = "CLASS_SEGMENTATION";
constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST";
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
constexpr char kAudioTestTag[] = "AUDIO_TEST";
constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER";
constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST";
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
constexpr char kFloatFeatureOtherTag[] = "FLOAT_FEATURE_OTHER";
constexpr char kFloatFeatureTestTag[] = "FLOAT_FEATURE_TEST";
constexpr char kImagePrefixTag[] = "IMAGE_PREFIX";
constexpr char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
constexpr char kImageTag[] = "IMAGE";
class PackMediaSequenceCalculatorTest : public ::testing::Test { class PackMediaSequenceCalculatorTest : public ::testing::Test {
protected: protected:
void SetUpCalculator(const std::vector<std::string>& input_streams, void SetUpCalculator(const std::vector<std::string>& input_streams,
const tf::Features& features, const tf::Features& features,
bool output_only_if_all_present, const bool output_only_if_all_present,
bool replace_instead_of_append) { const bool replace_instead_of_append,
const bool output_as_zero_timestamp = false) {
CalculatorGraphConfig::Node config; CalculatorGraphConfig::Node config;
config.set_calculator("PackMediaSequenceCalculator"); config.set_calculator("PackMediaSequenceCalculator");
config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence"); config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence");
@ -57,6 +79,7 @@ class PackMediaSequenceCalculatorTest : public ::testing::Test {
*options->mutable_context_feature_map() = features; *options->mutable_context_feature_map() = features;
options->set_output_only_if_all_present(output_only_if_all_present); options->set_output_only_if_all_present(output_only_if_all_present);
options->set_replace_data_instead_of_append(replace_instead_of_append); options->set_replace_data_instead_of_append(replace_instead_of_append);
options->set_output_as_zero_timestamp(output_as_zero_timestamp);
runner_ = ::absl::make_unique<CalculatorRunner>(config); runner_ = ::absl::make_unique<CalculatorRunner>(config);
} }
@ -80,17 +103,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoImages) {
for (int i = 0; i < num_images; ++i) { for (int i = 0; i < num_images; ++i) {
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs()->Tag("IMAGE").packets.push_back( runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
Adopt(image_ptr.release()).At(Timestamp(i))); Adopt(image_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -124,17 +147,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoPrefixedImages) {
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("IMAGE_PREFIX") ->Tag(kImagePrefixTag)
.packets.push_back(Adopt(image_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(image_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -158,21 +181,21 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
for (int i = 0; i < num_timesteps; ++i) { for (int i = 0; i < num_timesteps; ++i) {
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i); auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FLOAT_FEATURE_TEST") ->Tag(kFloatFeatureTestTag)
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i); vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FLOAT_FEATURE_OTHER") ->Tag(kFloatFeatureOtherTag)
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -194,20 +217,65 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
} }
} }
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) { TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) {
SetUpCalculator( SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"}, false, true);
{}, false, true); auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
auto input_sequence = absl::make_unique<tf::SequenceExample>();
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3); int num_timesteps = 2;
runner_->MutableInputs() for (int i = 0; i < num_timesteps; ++i) {
->Tag("FLOAT_CONTEXT_FEATURE_TEST") auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream())); 2, absl::StrCat("foo", 2 << i));
vf_ptr = absl::make_unique<std::vector<float>>(2, 4); runner_->MutableInputs()
runner_->MutableInputs() ->Tag(kBytesFeatureTestTag)
->Tag("FLOAT_CONTEXT_FEATURE_OTHER") .packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream())); vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("bar", 2 << i));
runner_->MutableInputs()
->Tag(kBytesFeatureOtherTag)
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
}
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("TEST", output_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence));
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("OTHER", output_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence));
for (int i = 0; i < num_timesteps; ++i) {
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i));
ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i),
::testing::ElementsAreArray(
std::vector<std::string>(2, absl::StrCat("foo", 2 << i))));
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i));
ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i),
::testing::ElementsAreArray(
std::vector<std::string>(2, absl::StrCat("bar", 2 << i))));
}
}
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
int num_timesteps = 2;
for (int i = 0; i < num_timesteps; ++i) {
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
runner_->MutableInputs()
->Tag("FLOAT_FEATURE_TEST")
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
}
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
@ -217,6 +285,32 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
EXPECT_EQ(output_packets[0].Timestamp().Value(), 0ll);
}
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
SetUpCalculator(
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"},
{}, false, true);
auto input_sequence = absl::make_unique<tf::SequenceExample>();
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3);
runner_->MutableInputs()
->Tag(kFloatContextFeatureTestTag)
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
vf_ptr = absl::make_unique<std::vector<float>>(2, 4);
runner_->MutableInputs()
->Tag(kFloatContextFeatureOtherTag)
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -233,7 +327,7 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
SetUpCalculator({"IMAGE:images"}, context, false, true); SetUpCalculator({"IMAGE:images"}, context, false, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>(); auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
cv::Mat image(2, 3, CV_8UC3, cv::Scalar(0, 0, 255)); cv::Mat image(2, 3, CV_8UC3, cv::Scalar(0, 0, 255));
std::vector<uchar> bytes; std::vector<uchar> bytes;
@ -242,13 +336,13 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
encoded_image.set_encoded_image(bytes.data(), bytes.size()); encoded_image.set_encoded_image(bytes.data(), bytes.size());
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs()->Tag("IMAGE").packets.push_back( runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
Adopt(image_ptr.release()).At(Timestamp(0))); Adopt(image_ptr.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -281,17 +375,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoForwardFlowEncodeds) {
auto flow_ptr = auto flow_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FORWARD_FLOW_ENCODED") ->Tag(kForwardFlowEncodedTag)
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -345,17 +439,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBBoxDetections) {
detections->push_back(detection); detections->push_back(detection);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("BBOX_PREDICTED") ->Tag(kBboxPredictedTag)
.packets.push_back(Adopt(detections.release()).At(Timestamp(i))); .packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -424,11 +518,11 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithoutImageDims) {
detections->push_back(detection); detections->push_back(detection);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("BBOX_PREDICTED") ->Tag(kBboxPredictedTag)
.packets.push_back(Adopt(detections.release()).At(Timestamp(i))); .packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
auto status = runner_->Run(); auto status = runner_->Run();
@ -472,7 +566,7 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithImages) {
detections->push_back(detection); detections->push_back(detection);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("BBOX_PREDICTED") ->Tag(kBboxPredictedTag)
.packets.push_back(Adopt(detections.release()).At(Timestamp(i))); .packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
} }
cv::Mat image(height, width, CV_8UC3, cv::Scalar(0, 0, 255)); cv::Mat image(height, width, CV_8UC3, cv::Scalar(0, 0, 255));
@ -487,16 +581,16 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithImages) {
for (int i = 0; i < num_images; ++i) { for (int i = 0; i < num_images; ++i) {
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs()->Tag("IMAGE").packets.push_back( runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
Adopt(image_ptr.release()).At(Timestamp(i))); Adopt(image_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -538,18 +632,18 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoKeypoints) {
absl::flat_hash_map<std::string, std::vector<std::pair<float, float>>> absl::flat_hash_map<std::string, std::vector<std::pair<float, float>>>
points = {{"HEAD", {{0.1, 0.2}, {0.3, 0.4}}}, {"TAIL", {{0.5, 0.6}}}}; points = {{"HEAD", {{0.1, 0.2}, {0.3, 0.4}}}, {"TAIL", {{0.5, 0.6}}}};
runner_->MutableInputs() runner_->MutableInputs()
->Tag("KEYPOINTS_TEST") ->Tag(kKeypointsTestTag)
.packets.push_back(PointToForeign(&points).At(Timestamp(0))); .packets.push_back(PointToForeign(&points).At(Timestamp(0)));
runner_->MutableInputs() runner_->MutableInputs()
->Tag("KEYPOINTS_TEST") ->Tag(kKeypointsTestTag)
.packets.push_back(PointToForeign(&points).At(Timestamp(1))); .packets.push_back(PointToForeign(&points).At(Timestamp(1)));
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -589,17 +683,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoMaskDetections) {
detections->push_back(detection); detections->push_back(detection);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("CLASS_SEGMENTATION") ->Tag(kClassSegmentationTag)
.packets.push_back(Adopt(detections.release()).At(Timestamp(i))); .packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -638,17 +732,17 @@ TEST_F(PackMediaSequenceCalculatorTest, MissingStreamOK) {
auto flow_ptr = auto flow_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FORWARD_FLOW_ENCODED") ->Tag(kForwardFlowEncodedTag)
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -684,11 +778,11 @@ TEST_F(PackMediaSequenceCalculatorTest, MissingStreamNotOK) {
auto flow_ptr = auto flow_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FORWARD_FLOW_ENCODED") ->Tag(kForwardFlowEncodedTag)
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
absl::Status status = runner_->Run(); absl::Status status = runner_->Run();
@ -705,13 +799,13 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingImages) {
mpms::AddImageTimestamp(1, input_sequence.get()); mpms::AddImageTimestamp(1, input_sequence.get());
mpms::AddImageTimestamp(2, input_sequence.get()); mpms::AddImageTimestamp(2, input_sequence.get());
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -731,13 +825,13 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFlowImages) {
mpms::AddForwardFlowTimestamp(1, input_sequence.get()); mpms::AddForwardFlowTimestamp(1, input_sequence.get());
mpms::AddForwardFlowTimestamp(2, input_sequence.get()); mpms::AddForwardFlowTimestamp(2, input_sequence.get());
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -768,13 +862,52 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) {
mpms::GetFeatureTimestampSize("OTHER", *input_sequence)); mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
ASSERT_EQ(num_timesteps, ASSERT_EQ(num_timesteps,
mpms::GetFeatureFloatsSize("OTHER", *input_sequence)); mpms::GetFeatureFloatsSize("OTHER", *input_sequence));
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
}
TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) {
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
false, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
int num_timesteps = 2;
for (int i = 0; i < num_timesteps; ++i) {
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("foo", 2 << i));
mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get());
mpms::AddFeatureTimestamp("TEST", i, input_sequence.get());
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("bar", 2 << i));
mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get());
mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get());
}
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("TEST", *input_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence));
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -800,7 +933,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
for (int i = 0; i < num_images; ++i) { for (int i = 0; i < num_images; ++i) {
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs()->Tag("IMAGE").packets.push_back( runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
Adopt(image_ptr.release()).At(Timestamp((i + 1) * 10))); Adopt(image_ptr.release()).At(Timestamp((i + 1) * 10)));
} }
@ -812,11 +945,11 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
mpms::AddBBoxTimestamp("PREFIX", 9, input_sequence.get()); mpms::AddBBoxTimestamp("PREFIX", 9, input_sequence.get());
mpms::AddBBoxTimestamp("PREFIX", 22, input_sequence.get()); mpms::AddBBoxTimestamp("PREFIX", 22, input_sequence.get());
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets; runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence = const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>(); output_packets[0].Get<tf::SequenceExample>();
@ -853,7 +986,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
for (int i = 0; i < num_images; ++i) { for (int i = 0; i < num_images; ++i) {
auto image_ptr = auto image_ptr =
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image); ::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
runner_->MutableInputs()->Tag("IMAGE").packets.push_back( runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
Adopt(image_ptr.release()).At(Timestamp(i))); Adopt(image_ptr.release()).At(Timestamp(i)));
} }
@ -867,7 +1000,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
Location::CreateRelativeBBoxLocation(0, 0.5, 0.5, 0.5) Location::CreateRelativeBBoxLocation(0, 0.5, 0.5, 0.5)
.ConvertToProto(detection.mutable_location_data()); .ConvertToProto(detection.mutable_location_data());
detections->push_back(detection); detections->push_back(detection);
runner_->MutableInputs()->Tag("BBOX").packets.push_back( runner_->MutableInputs()->Tag(kBboxTag).packets.push_back(
Adopt(detections.release()).At(Timestamp(i))); Adopt(detections.release()).At(Timestamp(i)));
} }
@ -883,7 +1016,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
mpms::AddBBoxTrackIndex({-1}, input_sequence.get()); mpms::AddBBoxTrackIndex({-1}, input_sequence.get());
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
// If the all the previous values aren't cleared, this assert will fail. // If the all the previous values aren't cleared, this assert will fail.
MP_ASSERT_OK(runner_->Run()); MP_ASSERT_OK(runner_->Run());
@ -899,11 +1032,11 @@ TEST_F(PackMediaSequenceCalculatorTest, TestTooLargeInputFailsSoftly) {
for (int i = 0; i < num_timesteps; ++i) { for (int i = 0; i < num_timesteps; ++i) {
auto vf_ptr = ::absl::make_unique<std::vector<float>>(1000000, i); auto vf_ptr = ::absl::make_unique<std::vector<float>>(1000000, i);
runner_->MutableInputs() runner_->MutableInputs()
->Tag("FLOAT_FEATURE_TEST") ->Tag(kFloatFeatureTestTag)
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i))); .packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
} }
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") = runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release()); Adopt(input_sequence.release());
ASSERT_FALSE(runner_->Run().ok()); ASSERT_FALSE(runner_->Run().ok());
} }

View File

@ -26,6 +26,8 @@ namespace mediapipe {
namespace tf = ::tensorflow; namespace tf = ::tensorflow;
namespace { namespace {
constexpr char kReferenceTag[] = "REFERENCE";
constexpr char kMatrix[] = "MATRIX"; constexpr char kMatrix[] = "MATRIX";
constexpr char kTensor[] = "TENSOR"; constexpr char kTensor[] = "TENSOR";
@ -68,7 +70,8 @@ class TensorToMatrixCalculatorTest : public ::testing::Test {
if (include_rate) { if (include_rate) {
header->set_packet_rate(1.0); header->set_packet_rate(1.0);
} }
runner_->MutableInputs()->Tag("REFERENCE").header = Adopt(header.release()); runner_->MutableInputs()->Tag(kReferenceTag).header =
Adopt(header.release());
} }
std::unique_ptr<CalculatorRunner> runner_; std::unique_ptr<CalculatorRunner> runner_;

View File

@ -0,0 +1,118 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Calculator converts from one-dimensional Tensor of DT_STRING to
// vector<std::string> OR from (batched) two-dimensional Tensor of DT_STRING to
// vector<vector<std::string>.
#include "mediapipe/calculators/tensorflow/tensor_to_vector_string_calculator_options.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.h"
namespace mediapipe {
namespace tf = ::tensorflow;
class TensorToVectorStringCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
TensorToVectorStringCalculatorOptions options_;
};
REGISTER_CALCULATOR(TensorToVectorStringCalculator);
absl::Status TensorToVectorStringCalculator::GetContract(
CalculatorContract* cc) {
// Start with only one input packet.
RET_CHECK_EQ(cc->Inputs().NumEntries(), 1)
<< "Only one input stream is supported.";
cc->Inputs().Index(0).Set<tf::Tensor>(
// Input Tensor
);
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1)
<< "Only one output stream is supported.";
const auto& options = cc->Options<TensorToVectorStringCalculatorOptions>();
if (options.tensor_is_2d()) {
RET_CHECK(!options.flatten_nd());
cc->Outputs().Index(0).Set<std::vector<std::vector<std::string>>>(
/* "Output vector<vector<std::string>>." */);
} else {
cc->Outputs().Index(0).Set<std::vector<std::string>>(
// Output vector<std::string>.
);
}
return absl::OkStatus();
}
absl::Status TensorToVectorStringCalculator::Open(CalculatorContext* cc) {
options_ = cc->Options<TensorToVectorStringCalculatorOptions>();
// Inform mediapipe that this calculator produces an output at time t for
// each input received at time t (i.e. this calculator does not buffer
// inputs). This enables mediapipe to propagate time of arrival estimates in
// mediapipe graphs through this calculator.
cc->SetOffset(/*offset=*/0);
return absl::OkStatus();
}
absl::Status TensorToVectorStringCalculator::Process(CalculatorContext* cc) {
const tf::Tensor& input_tensor =
cc->Inputs().Index(0).Value().Get<tf::Tensor>();
RET_CHECK(tf::DT_STRING == input_tensor.dtype())
<< "expected DT_STRING input but got "
<< tensorflow::DataTypeString(input_tensor.dtype());
if (options_.tensor_is_2d()) {
RET_CHECK(2 == input_tensor.dims())
<< "Expected 2-dimensional Tensor, but the tensor shape is: "
<< input_tensor.shape().DebugString();
auto output = absl::make_unique<std::vector<std::vector<std::string>>>(
input_tensor.dim_size(0),
std::vector<std::string>(input_tensor.dim_size(1)));
for (int i = 0; i < input_tensor.dim_size(0); ++i) {
auto& instance_output = output->at(i);
const auto& slice =
input_tensor.Slice(i, i + 1).unaligned_flat<tensorflow::tstring>();
for (int j = 0; j < input_tensor.dim_size(1); ++j) {
instance_output.at(j) = slice(j);
}
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
} else {
if (!options_.flatten_nd()) {
RET_CHECK(1 == input_tensor.dims())
<< "`flatten_nd` is not set. Expected 1-dimensional Tensor, but the "
<< "tensor shape is: " << input_tensor.shape().DebugString();
}
auto output =
absl::make_unique<std::vector<std::string>>(input_tensor.NumElements());
const auto& tensor_values = input_tensor.flat<tensorflow::tstring>();
for (int i = 0; i < input_tensor.NumElements(); ++i) {
output->at(i) = tensor_values(i);
}
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,33 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorToVectorStringCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional TensorToVectorStringCalculatorOptions ext = 386534187;
}
// If true, unpack a 2d tensor (matrix) into a vector<vector<string>>. If
// false, convert a 1d tensor (vector) into a vector<string>.
optional bool tensor_is_2d = 1 [default = false];
// If true, an N-D tensor will be flattened to a vector<string>. This is
// exclusive with tensor_is_2d.
optional bool flatten_nd = 2 [default = false];
}

Some files were not shown because too many files have changed in this diff Show More