Merge branch 'master' of https://github.com/google/mediapipe
This commit is contained in:
commit
9a4a0fb7a9
|
@ -45,7 +45,7 @@ Hair Segmentation
|
||||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||||
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
||||||
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
||||||
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
|
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ |
|
||||||
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
||||||
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
||||||
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
||||||
|
@ -79,6 +79,13 @@ run code search using
|
||||||
|
|
||||||
## Publications
|
## Publications
|
||||||
|
|
||||||
|
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
|
||||||
|
in Google Developers Blog
|
||||||
|
* [Prosthesis control via Mirru App using MediaPipe hand tracking](https://developers.googleblog.com/2021/05/control-your-mirru-prosthesis-with-mediapipe-hand-tracking.html)
|
||||||
|
in Google Developers Blog
|
||||||
|
* [SignAll SDK: Sign language interface using MediaPipe is now available for
|
||||||
|
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
|
||||||
|
in Google Developers Blog
|
||||||
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
|
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
|
||||||
in Google AI Blog
|
in Google AI Blog
|
||||||
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
|
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
|
||||||
|
|
68
WORKSPACE
68
WORKSPACE
|
@ -16,11 +16,11 @@ bazel_skylib_workspace()
|
||||||
load("@bazel_skylib//lib:versions.bzl", "versions")
|
load("@bazel_skylib//lib:versions.bzl", "versions")
|
||||||
versions.check(minimum_bazel_version = "3.7.2")
|
versions.check(minimum_bazel_version = "3.7.2")
|
||||||
|
|
||||||
# ABSL cpp library lts_2020_09_23
|
# ABSL cpp library lts_2021_03_24, patch 2.
|
||||||
http_archive(
|
http_archive(
|
||||||
name = "com_google_absl",
|
name = "com_google_absl",
|
||||||
urls = [
|
urls = [
|
||||||
"https://github.com/abseil/abseil-cpp/archive/20200923.tar.gz",
|
"https://github.com/abseil/abseil-cpp/archive/refs/tags/20210324.2.tar.gz",
|
||||||
],
|
],
|
||||||
# Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
|
# Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
|
||||||
patches = [
|
patches = [
|
||||||
|
@ -29,8 +29,8 @@ http_archive(
|
||||||
patch_args = [
|
patch_args = [
|
||||||
"-p1",
|
"-p1",
|
||||||
],
|
],
|
||||||
strip_prefix = "abseil-cpp-20200923",
|
strip_prefix = "abseil-cpp-20210324.2",
|
||||||
sha256 = "b3744a4f7a249d5eaf2309daad597631ce77ea62e0fc6abffbab4b4c3dc0fc08"
|
sha256 = "59b862f50e710277f8ede96f083a5bb8d7c9595376146838b9580be90374ee1f"
|
||||||
)
|
)
|
||||||
|
|
||||||
http_archive(
|
http_archive(
|
||||||
|
@ -53,19 +53,12 @@ rules_foreign_cc_dependencies()
|
||||||
all_content = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])"""
|
all_content = """filegroup(name = "all", srcs = glob(["**"]), visibility = ["//visibility:public"])"""
|
||||||
|
|
||||||
# GoogleTest/GoogleMock framework. Used by most unit-tests.
|
# GoogleTest/GoogleMock framework. Used by most unit-tests.
|
||||||
# Last updated 2020-06-30.
|
# Last updated 2021-07-02.
|
||||||
http_archive(
|
http_archive(
|
||||||
name = "com_google_googletest",
|
name = "com_google_googletest",
|
||||||
urls = ["https://github.com/google/googletest/archive/aee0f9d9b5b87796ee8a0ab26b7587ec30e8858e.zip"],
|
urls = ["https://github.com/google/googletest/archive/4ec4cd23f486bf70efcc5d2caa40f24368f752e3.zip"],
|
||||||
patches = [
|
strip_prefix = "googletest-4ec4cd23f486bf70efcc5d2caa40f24368f752e3",
|
||||||
# fix for https://github.com/google/googletest/issues/2817
|
sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049",
|
||||||
"@//third_party:com_google_googletest_9d580ea80592189e6d44fa35bcf9cdea8bf620d6.diff"
|
|
||||||
],
|
|
||||||
patch_args = [
|
|
||||||
"-p1",
|
|
||||||
],
|
|
||||||
strip_prefix = "googletest-aee0f9d9b5b87796ee8a0ab26b7587ec30e8858e",
|
|
||||||
sha256 = "04a1751f94244307cebe695a69cc945f9387a80b0ef1af21394a490697c5c895",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Google Benchmark library.
|
# Google Benchmark library.
|
||||||
|
@ -164,11 +157,11 @@ http_archive(
|
||||||
http_archive(
|
http_archive(
|
||||||
name = "pybind11",
|
name = "pybind11",
|
||||||
urls = [
|
urls = [
|
||||||
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.4.3.tar.gz",
|
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.7.1.tar.gz",
|
||||||
"https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz",
|
"https://github.com/pybind/pybind11/archive/v2.7.1.tar.gz",
|
||||||
],
|
],
|
||||||
sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d",
|
sha256 = "616d1c42e4cf14fa27b2a4ff759d7d7b33006fdc5ad8fd603bb2c22622f27020",
|
||||||
strip_prefix = "pybind11-2.4.3",
|
strip_prefix = "pybind11-2.7.1",
|
||||||
build_file = "@pybind11_bazel//:pybind11.BUILD",
|
build_file = "@pybind11_bazel//:pybind11.BUILD",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -338,7 +331,10 @@ load("@rules_jvm_external//:defs.bzl", "maven_install")
|
||||||
maven_install(
|
maven_install(
|
||||||
artifacts = [
|
artifacts = [
|
||||||
"androidx.concurrent:concurrent-futures:1.0.0-alpha03",
|
"androidx.concurrent:concurrent-futures:1.0.0-alpha03",
|
||||||
"androidx.lifecycle:lifecycle-common:2.2.0",
|
"androidx.lifecycle:lifecycle-common:2.3.1",
|
||||||
|
"androidx.activity:activity:1.2.2",
|
||||||
|
"androidx.exifinterface:exifinterface:1.3.3",
|
||||||
|
"androidx.fragment:fragment:1.3.4",
|
||||||
"androidx.annotation:annotation:aar:1.1.0",
|
"androidx.annotation:annotation:aar:1.1.0",
|
||||||
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
|
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
|
||||||
"androidx.camera:camera-core:1.0.0-beta10",
|
"androidx.camera:camera-core:1.0.0-beta10",
|
||||||
|
@ -353,9 +349,12 @@ maven_install(
|
||||||
"com.google.android.material:material:aar:1.0.0-rc01",
|
"com.google.android.material:material:aar:1.0.0-rc01",
|
||||||
"com.google.auto.value:auto-value:1.8.1",
|
"com.google.auto.value:auto-value:1.8.1",
|
||||||
"com.google.auto.value:auto-value-annotations:1.8.1",
|
"com.google.auto.value:auto-value-annotations:1.8.1",
|
||||||
"com.google.code.findbugs:jsr305:3.0.2",
|
"com.google.code.findbugs:jsr305:latest.release",
|
||||||
"com.google.flogger:flogger-system-backend:0.3.1",
|
"com.google.android.datatransport:transport-api:3.0.0",
|
||||||
"com.google.flogger:flogger:0.3.1",
|
"com.google.android.datatransport:transport-backend-cct:3.1.0",
|
||||||
|
"com.google.android.datatransport:transport-runtime:3.1.0",
|
||||||
|
"com.google.flogger:flogger-system-backend:0.6",
|
||||||
|
"com.google.flogger:flogger:0.6",
|
||||||
"com.google.guava:guava:27.0.1-android",
|
"com.google.guava:guava:27.0.1-android",
|
||||||
"com.google.guava:listenablefuture:1.0",
|
"com.google.guava:listenablefuture:1.0",
|
||||||
"junit:junit:4.12",
|
"junit:junit:4.12",
|
||||||
|
@ -383,9 +382,9 @@ http_archive(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tensorflow repo should always go after the other external dependencies.
|
# Tensorflow repo should always go after the other external dependencies.
|
||||||
# 2021-06-07
|
# 2021-07-29
|
||||||
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
|
_TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3"
|
||||||
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
|
_TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56"
|
||||||
http_archive(
|
http_archive(
|
||||||
name = "org_tensorflow",
|
name = "org_tensorflow",
|
||||||
urls = [
|
urls = [
|
||||||
|
@ -394,6 +393,8 @@ http_archive(
|
||||||
patches = [
|
patches = [
|
||||||
"@//third_party:org_tensorflow_compatibility_fixes.diff",
|
"@//third_party:org_tensorflow_compatibility_fixes.diff",
|
||||||
"@//third_party:org_tensorflow_objc_cxx17.diff",
|
"@//third_party:org_tensorflow_objc_cxx17.diff",
|
||||||
|
# Diff is generated with a script, don't update it manually.
|
||||||
|
"@//third_party:org_tensorflow_custom_ops.diff",
|
||||||
],
|
],
|
||||||
patch_args = [
|
patch_args = [
|
||||||
"-p1",
|
"-p1",
|
||||||
|
@ -406,3 +407,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
|
||||||
tf_workspace3()
|
tf_workspace3()
|
||||||
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
|
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
|
||||||
tf_workspace2()
|
tf_workspace2()
|
||||||
|
|
||||||
|
# Edge TPU
|
||||||
|
http_archive(
|
||||||
|
name = "libedgetpu",
|
||||||
|
sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80",
|
||||||
|
strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7",
|
||||||
|
urls = [
|
||||||
|
"https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz"
|
||||||
|
],
|
||||||
|
)
|
||||||
|
load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies")
|
||||||
|
libedgetpu_dependencies()
|
||||||
|
|
||||||
|
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
|
||||||
|
cc_crosstool(name = "crosstool")
|
||||||
|
|
|
@ -16,12 +16,14 @@ nav_order: 1
|
||||||
|
|
||||||
Please follow instructions below to build Android example apps in the supported
|
Please follow instructions below to build Android example apps in the supported
|
||||||
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
|
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
|
||||||
example apps, start from [Hello World! on Android](./hello_world_android.md). To
|
example apps, start from [Hello World! on Android](./hello_world_android.md).
|
||||||
incorporate MediaPipe into an existing Android Studio project, see these
|
|
||||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
|
||||||
Gradle.
|
|
||||||
|
|
||||||
## Building Android example apps
|
To incorporate MediaPipe into Android Studio projects, see these
|
||||||
|
[instructions](./android_solutions.md) to use the MediaPipe Android Solution
|
||||||
|
APIs (currently in alpha) that are now available in
|
||||||
|
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||||
|
|
||||||
|
## Building Android example apps with Bazel
|
||||||
|
|
||||||
### Prerequisite
|
### Prerequisite
|
||||||
|
|
||||||
|
@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
|
||||||
android_sdk_repository() in the
|
android_sdk_repository() in the
|
||||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
|
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
|
||||||
|
|
||||||
Please verify all the necessary packages are installed.
|
|
||||||
|
|
||||||
* Android SDK Platform API Level 28 or 29
|
|
||||||
* Android SDK Build-Tools 28 or 29
|
|
||||||
* Android SDK Platform-Tools 28 or 29
|
|
||||||
* Android SDK Tools 26.1.1
|
|
||||||
* Android NDK 19c or above
|
|
||||||
|
|
||||||
### Option 1: Build with Bazel in Command Line
|
|
||||||
|
|
||||||
Tip: You can run this
|
Tip: You can run this
|
||||||
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
|
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
|
||||||
to build (and install) all MediaPipe Android example apps.
|
to build (and install) all MediaPipe Android example apps.
|
||||||
|
@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps.
|
||||||
```bash
|
```bash
|
||||||
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
|
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option 2: Build with Bazel in Android Studio
|
|
||||||
|
|
||||||
The MediaPipe project can be imported into Android Studio using the Bazel
|
|
||||||
plugins. This allows the MediaPipe examples to be built and modified in Android
|
|
||||||
Studio.
|
|
||||||
|
|
||||||
To incorporate MediaPipe into an existing Android Studio project, see these
|
|
||||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
|
||||||
Gradle.
|
|
||||||
|
|
||||||
The steps below use Android Studio 3.5 to build and install a MediaPipe example
|
|
||||||
app:
|
|
||||||
|
|
||||||
1. Install and launch Android Studio 3.5.
|
|
||||||
|
|
||||||
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
|
|
||||||
|
|
||||||
* Verify that Android SDK Platform API Level 28 or 29 is installed.
|
|
||||||
* Take note of the Android SDK Location, e.g.,
|
|
||||||
`/usr/local/home/Android/Sdk`.
|
|
||||||
|
|
||||||
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
|
|
||||||
|
|
||||||
* Verify that Android SDK Build-Tools 28 or 29 is installed.
|
|
||||||
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
|
|
||||||
* Verify that Android SDK Tools 26.1.1 is installed.
|
|
||||||
* Verify that Android NDK 19c or above is installed.
|
|
||||||
* Take note of the Android NDK Location, e.g.,
|
|
||||||
`/usr/local/home/Android/Sdk/ndk-bundle` or
|
|
||||||
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
|
|
||||||
|
|
||||||
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
|
|
||||||
to the installed SDK and NDK.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export ANDROID_HOME=/usr/local/home/Android/Sdk
|
|
||||||
|
|
||||||
# If the NDK libraries are installed by a previous version of Android Studio, do
|
|
||||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
|
|
||||||
# If the NDK libraries are installed by Android Studio 3.5, do
|
|
||||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Select `Configure` -> `Plugins` to install `Bazel`.
|
|
||||||
|
|
||||||
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
|
|
||||||
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
|
|
||||||
binary location` to be the same as the output of `$ which bazel`.
|
|
||||||
|
|
||||||
7. Select `Import Bazel Project`.
|
|
||||||
|
|
||||||
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
|
|
||||||
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
|
|
||||||
`Next`.
|
|
||||||
* Modify `Project View` to be the following and select `Finish`.
|
|
||||||
|
|
||||||
```
|
|
||||||
directories:
|
|
||||||
# read project settings, e.g., .bazelrc
|
|
||||||
.
|
|
||||||
-mediapipe/objc
|
|
||||||
-mediapipe/examples/ios
|
|
||||||
|
|
||||||
targets:
|
|
||||||
//mediapipe/examples/android/...:all
|
|
||||||
//mediapipe/java/...:all
|
|
||||||
|
|
||||||
android_sdk_platform: android-29
|
|
||||||
|
|
||||||
sync_flags:
|
|
||||||
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
|
||||||
```
|
|
||||||
|
|
||||||
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
|
|
||||||
|
|
||||||
Note: Even after doing step 4, if you still see the error: `"no such package
|
|
||||||
'@androidsdk//': Either the path attribute of android_sdk_repository or the
|
|
||||||
ANDROID_HOME environment variable must be set."`, please modify the
|
|
||||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
|
|
||||||
file to point to your SDK and NDK library locations, as below:
|
|
||||||
|
|
||||||
```
|
|
||||||
android_sdk_repository(
|
|
||||||
name = "androidsdk",
|
|
||||||
path = "/path/to/android/sdk"
|
|
||||||
)
|
|
||||||
|
|
||||||
android_ndk_repository(
|
|
||||||
name = "androidndk",
|
|
||||||
path = "/path/to/android/ndk"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
9. Connect an Android device to the workstation.
|
|
||||||
|
|
||||||
10. Select `Run...` -> `Edit Configurations...`.
|
|
||||||
|
|
||||||
* Select `Templates` -> `Bazel Command`.
|
|
||||||
* Enter Target Expression:
|
|
||||||
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
|
|
||||||
* Enter Bazel command: `mobile-install`.
|
|
||||||
* Enter Bazel flags: `-c opt --config=android_arm64`.
|
|
||||||
* Press the `[+]` button to add the new configuration.
|
|
||||||
* Select `Run` to run the example app on the connected Android device.
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ layout: default
|
||||||
title: MediaPipe Android Archive
|
title: MediaPipe Android Archive
|
||||||
parent: MediaPipe on Android
|
parent: MediaPipe on Android
|
||||||
grand_parent: Getting Started
|
grand_parent: Getting Started
|
||||||
nav_order: 2
|
nav_order: 3
|
||||||
---
|
---
|
||||||
|
|
||||||
# MediaPipe Android Archive
|
# MediaPipe Android Archive
|
||||||
|
@ -113,9 +113,9 @@ each project.
|
||||||
androidTestImplementation 'androidx.test.ext:junit:1.1.0'
|
androidTestImplementation 'androidx.test.ext:junit:1.1.0'
|
||||||
androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1'
|
androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1'
|
||||||
// MediaPipe deps
|
// MediaPipe deps
|
||||||
implementation 'com.google.flogger:flogger:0.3.1'
|
implementation 'com.google.flogger:flogger:latest.release'
|
||||||
implementation 'com.google.flogger:flogger-system-backend:0.3.1'
|
implementation 'com.google.flogger:flogger-system-backend:latest.release'
|
||||||
implementation 'com.google.code.findbugs:jsr305:3.0.2'
|
implementation 'com.google.code.findbugs:jsr305:latest.release'
|
||||||
implementation 'com.google.guava:guava:27.0.1-android'
|
implementation 'com.google.guava:guava:27.0.1-android'
|
||||||
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||||
// CameraX core library
|
// CameraX core library
|
||||||
|
|
131
docs/getting_started/android_solutions.md
Normal file
131
docs/getting_started/android_solutions.md
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
---
|
||||||
|
layout: default
|
||||||
|
title: MediaPipe Android Solutions
|
||||||
|
parent: MediaPipe on Android
|
||||||
|
grand_parent: Getting Started
|
||||||
|
nav_order: 2
|
||||||
|
---
|
||||||
|
|
||||||
|
# MediaPipe Android Solutions
|
||||||
|
{: .no_toc }
|
||||||
|
|
||||||
|
1. TOC
|
||||||
|
{:toc}
|
||||||
|
---
|
||||||
|
|
||||||
|
MediaPipe Android Solution APIs (currently in alpha) are available in:
|
||||||
|
|
||||||
|
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
|
||||||
|
* [MediaPipe Face Mesh](../solutions/face_mesh#android-solution-api)
|
||||||
|
* [MediaPipe Hands](../solutions/hands#android-solution-api)
|
||||||
|
|
||||||
|
## Incorporation in Android Studio
|
||||||
|
|
||||||
|
Prebuilt packages of Android Solution APIs can be found in
|
||||||
|
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||||
|
To incorporate them into an Android Studio project, add the following into the
|
||||||
|
project's Gradle dependencies:
|
||||||
|
|
||||||
|
```
|
||||||
|
dependencies {
|
||||||
|
// MediaPipe solution-core is the foundation of any MediaPipe Solutions.
|
||||||
|
implementation 'com.google.mediapipe:solution-core:latest.release'
|
||||||
|
// Optional: MediaPipe Face Detection Solution.
|
||||||
|
implementation 'com.google.mediapipe:facedetection:latest.release'
|
||||||
|
// Optional: MediaPipe Face Mesh Solution.
|
||||||
|
implementation 'com.google.mediapipe:facemesh:latest.release'
|
||||||
|
// Optional: MediaPipe Hands Solution.
|
||||||
|
implementation 'com.google.mediapipe:hands:latest.release'
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If you need further customization, instead of using the prebuilt maven packages
|
||||||
|
consider building a MediaPipe Android Archive library locally from source by
|
||||||
|
following these [instructions](./android_archive_library.md).
|
||||||
|
|
||||||
|
## Building solution example apps
|
||||||
|
|
||||||
|
Detailed usage examples of the Android Solution APIs can be found in the
|
||||||
|
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions)
|
||||||
|
of the solution example apps.
|
||||||
|
|
||||||
|
To build these apps:
|
||||||
|
|
||||||
|
1. Open Android Studio Arctic Fox on Linux, macOS, or Windows.
|
||||||
|
|
||||||
|
2. Import mediapipe/examples/android/solutions directory into Android Studio.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
3. For Windows users, run `create_win_symlinks.bat` as administrator to create
|
||||||
|
res directory symlinks.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
4. Select "File" -> "Sync Project with Gradle Files" to sync project.
|
||||||
|
|
||||||
|
5. Run solution example app in Android Studio.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
6. (Optional) Run solutions on CPU.
|
||||||
|
|
||||||
|
MediaPipe solution example apps run the pipeline and model inference on GPU
|
||||||
|
by default. If needed, for example to run the apps on Android Emulator, set
|
||||||
|
the `RUN_ON_GPU` boolean variable to `false` in the app's
|
||||||
|
`MainActivity.java` to run the pipeline and model inference on CPU.
|
||||||
|
|
||||||
|
## MediaPipe Solution APIs Terms of Service
|
||||||
|
|
||||||
|
Last modified: November 12, 2021
|
||||||
|
|
||||||
|
Use of MediaPipe Solution APIs is subject to the
|
||||||
|
[Google APIs Terms of Service](https://developers.google.com/terms),
|
||||||
|
[Google API Services User Data Policy](https://developers.google.com/terms/api-services-user-data-policy),
|
||||||
|
and the terms below. Please check back from time to time as these terms and
|
||||||
|
policies are occasionally updated.
|
||||||
|
|
||||||
|
**Privacy**
|
||||||
|
|
||||||
|
When you use MediaPipe Solution APIs, processing of the input data (e.g. images,
|
||||||
|
video, text) fully happens on-device, and **MediaPipe does not send that input
|
||||||
|
data to Google servers**. As a result, you can use our APIs for processing data
|
||||||
|
that should not leave the device.
|
||||||
|
|
||||||
|
MediaPipe Android Solution APIs will contact Google servers from time to time in
|
||||||
|
order to receive things like bug fixes, updated models, and hardware accelerator
|
||||||
|
compatibility information. MediaPipe Android Solution APIs also send metrics
|
||||||
|
about the performance and utilization of the APIs in your app to Google. Google
|
||||||
|
uses this metrics data to measure performance, API usage, debug, maintain and
|
||||||
|
improve the APIs, and detect misuse or abuse, as further described in our
|
||||||
|
[Privacy Policy](https://policies.google.com/privacy).
|
||||||
|
|
||||||
|
**You are responsible for obtaining informed consent from your app users about
|
||||||
|
Google’s processing of MediaPipe metrics data as required by applicable law.**
|
||||||
|
|
||||||
|
Data we collect may include the following, across all MediaPipe Android Solution
|
||||||
|
APIs:
|
||||||
|
|
||||||
|
- Device information (such as manufacturer, model, OS version and build) and
|
||||||
|
available ML hardware accelerators (GPU and DSP). Used for diagnostics and
|
||||||
|
usage analytics.
|
||||||
|
|
||||||
|
- App identification information (package name / bundle id, app version). Used
|
||||||
|
for diagnostics and usage analytics.
|
||||||
|
|
||||||
|
- API configuration (such as image format, resolution, and MediaPipe version
|
||||||
|
used). Used for diagnostics and usage analytics.
|
||||||
|
|
||||||
|
- Event type (such as initialize, download model, update, run, and detection).
|
||||||
|
Used for diagnostics and usage analytics.
|
||||||
|
|
||||||
|
- Error codes. Used for diagnostics.
|
||||||
|
|
||||||
|
- Performance metrics. Used for diagnostics.
|
||||||
|
|
||||||
|
- Per-installation identifiers that do not uniquely identify a user or
|
||||||
|
physical device. Used for operation of remote configuration and usage
|
||||||
|
analytics.
|
||||||
|
|
||||||
|
- Network request sender IP addresses. Used for remote configuration
|
||||||
|
diagnostics. Collected IP addresses are retained temporarily.
|
|
@ -103,7 +103,7 @@ monotonically increasing timestamps. By convention, realtime calculators and
|
||||||
graphs use the recording time or the presentation time as the timestamp for each
|
graphs use the recording time or the presentation time as the timestamp for each
|
||||||
packet, with each timestamp representing microseconds since
|
packet, with each timestamp representing microseconds since
|
||||||
`Jan/1/1970:00:00:00`. This allows packets from various sources to be processed
|
`Jan/1/1970:00:00:00`. This allows packets from various sources to be processed
|
||||||
in a gloablly consistent order.
|
in a globally consistent order.
|
||||||
|
|
||||||
Normally for offline processing, every input packet is processed and processing
|
Normally for offline processing, every input packet is processed and processing
|
||||||
continues as long as necessary. For online processing, it is often necessary to
|
continues as long as necessary. For online processing, it is often necessary to
|
||||||
|
|
|
@ -31,8 +31,8 @@ stream on an Android device.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
1. Install MediaPipe on your system, see [MediaPipe installation guide] for
|
1. Install MediaPipe on your system, see
|
||||||
details.
|
[MediaPipe installation guide](./install.md) for details.
|
||||||
2. Install Android Development SDK and Android NDK. See how to do so also in
|
2. Install Android Development SDK and Android NDK. See how to do so also in
|
||||||
[MediaPipe installation guide].
|
[MediaPipe installation guide].
|
||||||
3. Enable [developer options] on your Android device.
|
3. Enable [developer options] on your Android device.
|
||||||
|
@ -770,7 +770,6 @@ If you ran into any issues, please see the full code of the tutorial
|
||||||
[`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java
|
[`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java
|
||||||
[`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout
|
[`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout
|
||||||
[`FrameProcessor`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java
|
[`FrameProcessor`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/FrameProcessor.java
|
||||||
[MediaPipe installation guide]:./install.md
|
|
||||||
[`PermissionHelper`]: https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/PermissionHelper.java
|
[`PermissionHelper`]: https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/PermissionHelper.java
|
||||||
[`SurfaceHolder.Callback`]:https://developer.android.com/reference/android/view/SurfaceHolder.Callback.html
|
[`SurfaceHolder.Callback`]:https://developer.android.com/reference/android/view/SurfaceHolder.Callback.html
|
||||||
[`SurfaceView`]:https://developer.android.com/reference/android/view/SurfaceView
|
[`SurfaceView`]:https://developer.android.com/reference/android/view/SurfaceView
|
||||||
|
|
|
@ -31,8 +31,8 @@ stream on an iOS device.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
1. Install MediaPipe on your system, see [MediaPipe installation guide] for
|
1. Install MediaPipe on your system, see
|
||||||
details.
|
[MediaPipe installation guide](./install.md) for details.
|
||||||
2. Setup your iOS device for development.
|
2. Setup your iOS device for development.
|
||||||
3. Setup [Bazel] on your system to build and deploy the iOS app.
|
3. Setup [Bazel] on your system to build and deploy the iOS app.
|
||||||
|
|
||||||
|
@ -113,6 +113,10 @@ bazel to build the iOS application. The content of the
|
||||||
5. `Main.storyboard` and `Launch.storyboard`
|
5. `Main.storyboard` and `Launch.storyboard`
|
||||||
6. `Assets.xcassets` directory.
|
6. `Assets.xcassets` directory.
|
||||||
|
|
||||||
|
Note: In newer versions of Xcode, you may see additional files `SceneDelegate.h`
|
||||||
|
and `SceneDelegate.m`. Make sure to copy them too and add them to the `BUILD`
|
||||||
|
file mentioned below.
|
||||||
|
|
||||||
Copy these files to a directory named `HelloWorld` to a location that can access
|
Copy these files to a directory named `HelloWorld` to a location that can access
|
||||||
the MediaPipe source code. For example, the source code of the application that
|
the MediaPipe source code. For example, the source code of the application that
|
||||||
we will build in this tutorial is located in
|
we will build in this tutorial is located in
|
||||||
|
@ -247,6 +251,12 @@ We need to get frames from the `_cameraSource` into our application
|
||||||
`MPPInputSourceDelegate`. So our application `ViewController` can be a delegate
|
`MPPInputSourceDelegate`. So our application `ViewController` can be a delegate
|
||||||
of `_cameraSource`.
|
of `_cameraSource`.
|
||||||
|
|
||||||
|
Update the interface definition of `ViewController` accordingly:
|
||||||
|
|
||||||
|
```
|
||||||
|
@interface ViewController () <MPPInputSourceDelegate>
|
||||||
|
```
|
||||||
|
|
||||||
To handle camera setup and process incoming frames, we should use a queue
|
To handle camera setup and process incoming frames, we should use a queue
|
||||||
different from the main queue. Add the following to the implementation block of
|
different from the main queue. Add the following to the implementation block of
|
||||||
the `ViewController`:
|
the `ViewController`:
|
||||||
|
@ -288,6 +298,12 @@ utility called `MPPLayerRenderer` to display images on the screen. This utility
|
||||||
can be used to display `CVPixelBufferRef` objects, which is the type of the
|
can be used to display `CVPixelBufferRef` objects, which is the type of the
|
||||||
images provided by `MPPCameraInputSource` to its delegates.
|
images provided by `MPPCameraInputSource` to its delegates.
|
||||||
|
|
||||||
|
In `ViewController.m`, add the following import line:
|
||||||
|
|
||||||
|
```
|
||||||
|
#import "mediapipe/objc/MPPLayerRenderer.h"
|
||||||
|
```
|
||||||
|
|
||||||
To display images of the screen, we need to add a new `UIView` object called
|
To display images of the screen, we need to add a new `UIView` object called
|
||||||
`_liveView` to the `ViewController`.
|
`_liveView` to the `ViewController`.
|
||||||
|
|
||||||
|
@ -411,6 +427,12 @@ Objective-C++.
|
||||||
|
|
||||||
### Use the graph in `ViewController`
|
### Use the graph in `ViewController`
|
||||||
|
|
||||||
|
In `ViewController.m`, add the following import line:
|
||||||
|
|
||||||
|
```
|
||||||
|
#import "mediapipe/objc/MPPGraph.h"
|
||||||
|
```
|
||||||
|
|
||||||
Declare a static constant with the name of the graph, the input stream and the
|
Declare a static constant with the name of the graph, the input stream and the
|
||||||
output stream:
|
output stream:
|
||||||
|
|
||||||
|
@ -549,6 +571,12 @@ method to receive packets on this output stream and display them on the screen:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Update the interface definition of `ViewController` with `MPPGraphDelegate`:
|
||||||
|
|
||||||
|
```
|
||||||
|
@interface ViewController () <MPPGraphDelegate, MPPInputSourceDelegate>
|
||||||
|
```
|
||||||
|
|
||||||
And that is all! Build and run the app on your iOS device. You should see the
|
And that is all! Build and run the app on your iOS device. You should see the
|
||||||
results of running the edge detection graph on a live video feed. Congrats!
|
results of running the edge detection graph on a live video feed. Congrats!
|
||||||
|
|
||||||
|
@ -560,6 +588,5 @@ appropriate `BUILD` file dependencies for the edge detection graph.
|
||||||
|
|
||||||
[Bazel]:https://bazel.build/
|
[Bazel]:https://bazel.build/
|
||||||
[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt
|
[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt
|
||||||
[MediaPipe installation guide]:./install.md
|
[common]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/common
|
||||||
[common]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/common)
|
[helloworld]:https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/helloworld
|
||||||
[helloworld]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/helloworld)
|
|
||||||
|
|
|
@ -43,104 +43,189 @@ install --user six`.
|
||||||
|
|
||||||
3. Install OpenCV and FFmpeg.
|
3. Install OpenCV and FFmpeg.
|
||||||
|
|
||||||
Option 1. Use package manager tool to install the pre-compiled OpenCV
|
**Option 1**. Use package manager tool to install the pre-compiled OpenCV
|
||||||
libraries. FFmpeg will be installed via libopencv-video-dev.
|
libraries. FFmpeg will be installed via `libopencv-video-dev`.
|
||||||
|
|
||||||
Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take
|
OS | OpenCV
|
||||||
option 2 or 3 to install OpenCV 3 or above.
|
-------------------- | ------
|
||||||
|
Debian 9 (stretch) | 2.4
|
||||||
|
Debian 10 (buster) | 3.2
|
||||||
|
Debian 11 (bullseye) | 4.5
|
||||||
|
Ubuntu 16.04 LTS | 2.4
|
||||||
|
Ubuntu 18.04 LTS | 3.2
|
||||||
|
Ubuntu 20.04 LTS | 4.2
|
||||||
|
Ubuntu 20.04 LTS | 4.2
|
||||||
|
Ubuntu 21.04 | 4.5
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
$ sudo apt-get install -y \
|
||||||
libopencv-calib3d-dev libopencv-features2d-dev \
|
libopencv-core-dev \
|
||||||
libopencv-imgproc-dev libopencv-video-dev
|
libopencv-highgui-dev \
|
||||||
|
libopencv-calib3d-dev \
|
||||||
|
libopencv-features2d-dev \
|
||||||
|
libopencv-imgproc-dev \
|
||||||
|
libopencv-video-dev
|
||||||
```
|
```
|
||||||
|
|
||||||
Debian 9 and Ubuntu 18.04 install the packages in
|
MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured
|
||||||
`/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and
|
for OpenCV 2/3 and should work correctly on any architecture:
|
||||||
[`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04
|
|
||||||
may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow
|
|
||||||
the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
|
||||||
[`ffmpeg_linux.BUILD`] files accordingly.
|
|
||||||
|
|
||||||
Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the
|
|
||||||
library path needs to be modified like the following:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_opencv",
|
||||||
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
|
path = "/usr",
|
||||||
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 2/3 installed from Debian package
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
linkopts = [
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source
|
For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account
|
||||||
and modify MediaPipe's OpenCV config.
|
current architecture:
|
||||||
|
|
||||||
Option 3. Follow OpenCV's
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_opencv",
|
||||||
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
|
path = "/usr",
|
||||||
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 4 installed from Debian package
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
hdrs = glob([
|
||||||
|
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||||
|
# "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||||
|
# "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h",
|
||||||
|
# "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||||
|
"include/opencv4/opencv2/**/*.h*",
|
||||||
|
]),
|
||||||
|
includes = [
|
||||||
|
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||||
|
# "include/aarch64-linux-gnu/opencv4/",
|
||||||
|
# "include/arm-linux-gnueabihf/opencv4/",
|
||||||
|
# "include/x86_64-linux-gnu/opencv4/",
|
||||||
|
"include/opencv4/",
|
||||||
|
],
|
||||||
|
linkopts = [
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from
|
||||||
|
source and modify MediaPipe's OpenCV config. This option will do all steps
|
||||||
|
defined in Option 3 automatically.
|
||||||
|
|
||||||
|
**Option 3**. Follow OpenCV's
|
||||||
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
|
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
|
||||||
to manually build OpenCV from source code.
|
to manually build OpenCV from source code.
|
||||||
|
|
||||||
Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point
|
||||||
[`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg
|
MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to
|
||||||
libraries. For example if OpenCV and FFmpeg are both manually installed in
|
`/usr/local/` which is recommended by default.
|
||||||
"/usr/local/", you will need to update: (1) the "linux_opencv" and
|
|
||||||
"linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv"
|
OpenCV 2/3 setup:
|
||||||
cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg"
|
|
||||||
cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
new_local_repository(
|
new_local_repository(
|
||||||
name = "linux_opencv",
|
name = "linux_opencv",
|
||||||
build_file = "@//third_party:opencv_linux.BUILD",
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
path = "/usr/local",
|
path = "/usr/local",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
linkopts = [
|
||||||
|
"-L/usr/local/lib",
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
OpenCV 4 setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
new_local_repository(
|
new_local_repository(
|
||||||
name = "linux_ffmpeg",
|
name = "linux_opencv",
|
||||||
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
path = "/usr/local",
|
path = "/usr/local",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 4 installed to /usr/local
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "opencv",
|
name = "opencv",
|
||||||
srcs = glob(
|
|
||||||
[
|
|
||||||
"lib/libopencv_core.so",
|
|
||||||
"lib/libopencv_highgui.so",
|
|
||||||
"lib/libopencv_imgcodecs.so",
|
|
||||||
"lib/libopencv_imgproc.so",
|
|
||||||
"lib/libopencv_video.so",
|
|
||||||
"lib/libopencv_videoio.so",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
hdrs = glob([
|
hdrs = glob([
|
||||||
# For OpenCV 3.x
|
"include/opencv4/opencv2/**/*.h*",
|
||||||
"include/opencv2/**/*.h*",
|
|
||||||
# For OpenCV 4.x
|
|
||||||
# "include/opencv4/opencv2/**/*.h*",
|
|
||||||
]),
|
]),
|
||||||
includes = [
|
includes = [
|
||||||
# For OpenCV 3.x
|
"include/opencv4/",
|
||||||
"include/",
|
|
||||||
# For OpenCV 4.x
|
|
||||||
# "include/opencv4/",
|
|
||||||
],
|
],
|
||||||
linkstatic = 1,
|
linkopts = [
|
||||||
visibility = ["//visibility:public"],
|
"-L/usr/local/lib",
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work
|
||||||
|
for any architecture:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_ffmpeg",
|
||||||
|
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
||||||
|
path = "/usr"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ffmpeg_linux.BUILD for FFmpeg installed from Debian package
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "libffmpeg",
|
name = "libffmpeg",
|
||||||
srcs = glob(
|
|
||||||
[
|
|
||||||
"lib/libav*.so",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
hdrs = glob(["include/libav*/*.h"]),
|
|
||||||
includes = ["include"],
|
|
||||||
linkopts = [
|
linkopts = [
|
||||||
"-lavcodec",
|
"-l:libavcodec.so",
|
||||||
"-lavformat",
|
"-l:libavformat.so",
|
||||||
"-lavutil",
|
"-l:libavutil.so",
|
||||||
],
|
],
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -711,7 +796,7 @@ This will use a Docker image that will isolate mediapipe's installation from the
|
||||||
```bash
|
```bash
|
||||||
$ docker run -it --name mediapipe mediapipe:latest
|
$ docker run -it --name mediapipe mediapipe:latest
|
||||||
|
|
||||||
root@bca08b91ff63:/mediapipe# GLOG_logtostderr=1 bazel run --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world
|
root@bca08b91ff63:/mediapipe# GLOG_logtostderr=1 bazelisk run --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world
|
||||||
|
|
||||||
# Should print:
|
# Should print:
|
||||||
# Hello World!
|
# Hello World!
|
||||||
|
|
|
@ -22,12 +22,23 @@ Solution | NPM Package | Example
|
||||||
[Face Detection][Fd-pg] | [@mediapipe/face_detection][Fd-npm] | [mediapipe.dev/demo/face_detection][Fd-demo]
|
[Face Detection][Fd-pg] | [@mediapipe/face_detection][Fd-npm] | [mediapipe.dev/demo/face_detection][Fd-demo]
|
||||||
[Hands][H-pg] | [@mediapipe/hands][H-npm] | [mediapipe.dev/demo/hands][H-demo]
|
[Hands][H-pg] | [@mediapipe/hands][H-npm] | [mediapipe.dev/demo/hands][H-demo]
|
||||||
[Holistic][Ho-pg] | [@mediapipe/holistic][Ho-npm] | [mediapipe.dev/demo/holistic][Ho-demo]
|
[Holistic][Ho-pg] | [@mediapipe/holistic][Ho-npm] | [mediapipe.dev/demo/holistic][Ho-demo]
|
||||||
|
[Objectron][Ob-pg] | [@mediapipe/objectron][Ob-npm] | [mediapipe.dev/demo/objectron][Ob-demo]
|
||||||
[Pose][P-pg] | [@mediapipe/pose][P-npm] | [mediapipe.dev/demo/pose][P-demo]
|
[Pose][P-pg] | [@mediapipe/pose][P-npm] | [mediapipe.dev/demo/pose][P-demo]
|
||||||
[Selfie Segmentation][S-pg] | [@mediapipe/selfie_segmentation][S-npm] | [mediapipe.dev/demo/selfie_segmentation][S-demo]
|
[Selfie Segmentation][S-pg] | [@mediapipe/selfie_segmentation][S-npm] | [mediapipe.dev/demo/selfie_segmentation][S-demo]
|
||||||
|
|
||||||
Click on a solution link above for more information, including API and code
|
Click on a solution link above for more information, including API and code
|
||||||
snippets.
|
snippets.
|
||||||
|
|
||||||
|
### Supported plaforms:
|
||||||
|
|
||||||
|
| Browser | Platform | Notes |
|
||||||
|
| ------- | ----------------------- | -------------------------------------- |
|
||||||
|
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
|
||||||
|
| | | unsupported. |
|
||||||
|
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
||||||
|
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
||||||
|
| | | MacBook |
|
||||||
|
|
||||||
The quickest way to get acclimated is to look at the examples above. Each demo
|
The quickest way to get acclimated is to look at the examples above. Each demo
|
||||||
has a link to a [CodePen][codepen] so that you can edit the code and try it
|
has a link to a [CodePen][codepen] so that you can edit the code and try it
|
||||||
yourself. We have included a number of utility packages to help you get started:
|
yourself. We have included a number of utility packages to help you get started:
|
||||||
|
@ -67,33 +78,24 @@ affecting your work, restrict your request to a `<minor>` number. e.g.,
|
||||||
[F-pg]: ../solutions/face_mesh#javascript-solution-api
|
[F-pg]: ../solutions/face_mesh#javascript-solution-api
|
||||||
[Fd-pg]: ../solutions/face_detection#javascript-solution-api
|
[Fd-pg]: ../solutions/face_detection#javascript-solution-api
|
||||||
[H-pg]: ../solutions/hands#javascript-solution-api
|
[H-pg]: ../solutions/hands#javascript-solution-api
|
||||||
|
[Ob-pg]: ../solutions/objectron#javascript-solution-api
|
||||||
[P-pg]: ../solutions/pose#javascript-solution-api
|
[P-pg]: ../solutions/pose#javascript-solution-api
|
||||||
[S-pg]: ../solutions/selfie_segmentation#javascript-solution-api
|
[S-pg]: ../solutions/selfie_segmentation#javascript-solution-api
|
||||||
[Ho-npm]: https://www.npmjs.com/package/@mediapipe/holistic
|
[Ho-npm]: https://www.npmjs.com/package/@mediapipe/holistic
|
||||||
[F-npm]: https://www.npmjs.com/package/@mediapipe/face_mesh
|
[F-npm]: https://www.npmjs.com/package/@mediapipe/face_mesh
|
||||||
[Fd-npm]: https://www.npmjs.com/package/@mediapipe/face_detection
|
[Fd-npm]: https://www.npmjs.com/package/@mediapipe/face_detection
|
||||||
[H-npm]: https://www.npmjs.com/package/@mediapipe/hands
|
[H-npm]: https://www.npmjs.com/package/@mediapipe/hands
|
||||||
|
[Ob-npm]: https://www.npmjs.com/package/@mediapipe/objectron
|
||||||
[P-npm]: https://www.npmjs.com/package/@mediapipe/pose
|
[P-npm]: https://www.npmjs.com/package/@mediapipe/pose
|
||||||
[S-npm]: https://www.npmjs.com/package/@mediapipe/selfie_segmentation
|
[S-npm]: https://www.npmjs.com/package/@mediapipe/selfie_segmentation
|
||||||
[draw-npm]: https://www.npmjs.com/package/@mediapipe/drawing_utils
|
[draw-npm]: https://www.npmjs.com/package/@mediapipe/drawing_utils
|
||||||
[cam-npm]: https://www.npmjs.com/package/@mediapipe/camera_utils
|
[cam-npm]: https://www.npmjs.com/package/@mediapipe/camera_utils
|
||||||
[ctrl-npm]: https://www.npmjs.com/package/@mediapipe/control_utils
|
[ctrl-npm]: https://www.npmjs.com/package/@mediapipe/control_utils
|
||||||
[Ho-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/holistic
|
|
||||||
[F-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_mesh
|
|
||||||
[Fd-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/face_detection
|
|
||||||
[H-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/hands
|
|
||||||
[P-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/pose
|
|
||||||
[P-jsd]: https://www.jsdelivr.com/package/npm/@mediapipe/selfie_segmentation
|
|
||||||
[Ho-pen]: https://code.mediapipe.dev/codepen/holistic
|
|
||||||
[F-pen]: https://code.mediapipe.dev/codepen/face_mesh
|
|
||||||
[Fd-pen]: https://code.mediapipe.dev/codepen/face_detection
|
|
||||||
[H-pen]: https://code.mediapipe.dev/codepen/hands
|
|
||||||
[P-pen]: https://code.mediapipe.dev/codepen/pose
|
|
||||||
[S-pen]: https://code.mediapipe.dev/codepen/selfie_segmentation
|
|
||||||
[Ho-demo]: https://mediapipe.dev/demo/holistic
|
[Ho-demo]: https://mediapipe.dev/demo/holistic
|
||||||
[F-demo]: https://mediapipe.dev/demo/face_mesh
|
[F-demo]: https://mediapipe.dev/demo/face_mesh
|
||||||
[Fd-demo]: https://mediapipe.dev/demo/face_detection
|
[Fd-demo]: https://mediapipe.dev/demo/face_detection
|
||||||
[H-demo]: https://mediapipe.dev/demo/hands
|
[H-demo]: https://mediapipe.dev/demo/hands
|
||||||
|
[Ob-demo]: https://mediapipe.dev/demo/objectron
|
||||||
[P-demo]: https://mediapipe.dev/demo/pose
|
[P-demo]: https://mediapipe.dev/demo/pose
|
||||||
[S-demo]: https://mediapipe.dev/demo/selfie_segmentation
|
[S-demo]: https://mediapipe.dev/demo/selfie_segmentation
|
||||||
[npm]: https://www.npmjs.com/package/@mediapipe
|
[npm]: https://www.npmjs.com/package/@mediapipe
|
||||||
|
|
|
@ -74,7 +74,7 @@ Mapping\[str, Packet\] | std::map<std::string, Packet> | create_st
|
||||||
np.ndarray<br>(cv.mat and PIL.Image) | mp::ImageFrame | create_image_frame(<br> format=ImageFormat.SRGB,<br> data=mat) | get_image_frame(packet)
|
np.ndarray<br>(cv.mat and PIL.Image) | mp::ImageFrame | create_image_frame(<br> format=ImageFormat.SRGB,<br> data=mat) | get_image_frame(packet)
|
||||||
np.ndarray | mp::Matrix | create_matrix(data) | get_matrix(packet)
|
np.ndarray | mp::Matrix | create_matrix(data) | get_matrix(packet)
|
||||||
Google Proto Message | Google Proto Message | create_proto(proto) | get_proto(packet)
|
Google Proto Message | Google Proto Message | create_proto(proto) | get_proto(packet)
|
||||||
List\[Proto\] | std::vector\<Proto\> | create_proto_vector(proto_list) | get_proto_list(packet)
|
List\[Proto\] | std::vector\<Proto\> | n/a | get_proto_list(packet)
|
||||||
|
|
||||||
It's not uncommon that users create custom C++ classes and and send those into
|
It's not uncommon that users create custom C++ classes and and send those into
|
||||||
the graphs and calculators. To allow the custom classes to be used in Python
|
the graphs and calculators. To allow the custom classes to be used in Python
|
||||||
|
|
BIN
docs/images/attention_mesh_architecture.png
Normal file
BIN
docs/images/attention_mesh_architecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 797 KiB |
BIN
docs/images/import_mp_android_studio_project.png
Normal file
BIN
docs/images/import_mp_android_studio_project.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 128 KiB |
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 77 KiB |
BIN
docs/images/run_android_solution_app.png
Normal file
BIN
docs/images/run_android_solution_app.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 258 KiB |
BIN
docs/images/run_create_win_symlinks.png
Normal file
BIN
docs/images/run_create_win_symlinks.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
|
@ -45,7 +45,7 @@ Hair Segmentation
|
||||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||||
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
||||||
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
||||||
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
|
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ |
|
||||||
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
||||||
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
||||||
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
||||||
|
@ -79,6 +79,13 @@ run code search using
|
||||||
|
|
||||||
## Publications
|
## Publications
|
||||||
|
|
||||||
|
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
|
||||||
|
in Google Developers Blog
|
||||||
|
* [Prosthesis control via Mirru App using MediaPipe hand tracking](https://developers.googleblog.com/2021/05/control-your-mirru-prosthesis-with-mediapipe-hand-tracking.html)
|
||||||
|
in Google Developers Blog
|
||||||
|
* [SignAll SDK: Sign language interface using MediaPipe is now available for
|
||||||
|
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
|
||||||
|
in Google Developers Blog
|
||||||
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
|
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
|
||||||
in Google AI Blog
|
in Google AI Blog
|
||||||
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
|
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
|
||||||
|
|
|
@ -121,12 +121,10 @@ with mp_face_detection.FaceDetection(
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Flip the image horizontally for a later selfie-view display, and convert
|
|
||||||
# the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = face_detection.process(image)
|
results = face_detection.process(image)
|
||||||
|
|
||||||
# Draw the face detection annotations on the image.
|
# Draw the face detection annotations on the image.
|
||||||
|
@ -135,7 +133,8 @@ with mp_face_detection.FaceDetection(
|
||||||
if results.detections:
|
if results.detections:
|
||||||
for detection in results.detections:
|
for detection in results.detections:
|
||||||
mp_drawing.draw_detection(image, detection)
|
mp_drawing.draw_detection(image, detection)
|
||||||
cv2.imshow('MediaPipe Face Detection', image)
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
|
@ -200,7 +199,7 @@ const faceDetection = new FaceDetection({locateFile: (file) => {
|
||||||
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
|
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
|
||||||
}});
|
}});
|
||||||
faceDetection.setOptions({
|
faceDetection.setOptions({
|
||||||
modelSelection: 0
|
modelSelection: 0,
|
||||||
minDetectionConfidence: 0.5
|
minDetectionConfidence: 0.5
|
||||||
});
|
});
|
||||||
faceDetection.onResults(onResults);
|
faceDetection.onResults(onResults);
|
||||||
|
@ -216,6 +215,214 @@ camera.start();
|
||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Android Solution API
|
||||||
|
|
||||||
|
Please first follow general
|
||||||
|
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
|
||||||
|
dependencies and try the Android Solution API in the companion
|
||||||
|
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facedetection),
|
||||||
|
and learn more in the usage example below.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [modelSelection](#model_selection)
|
||||||
|
|
||||||
|
#### Camera Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For camera input and result rendering with OpenGL.
|
||||||
|
FaceDetectionOptions faceDetectionOptions =
|
||||||
|
FaceDetectionOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setModelSelection(0).build();
|
||||||
|
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
|
||||||
|
faceDetection.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe Face Detection Solution.
|
||||||
|
CameraInput cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(
|
||||||
|
textureFrame -> faceDetection.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
faceDetection.setResultListener(
|
||||||
|
faceDetectionResult -> {
|
||||||
|
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RelativeKeypoint noseTip =
|
||||||
|
faceDetectionResult
|
||||||
|
.multiFaceDetections()
|
||||||
|
.get(0)
|
||||||
|
.getLocationData()
|
||||||
|
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseTip.getX(), noseTip.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceDetectionResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the GLSurfaceView is attached.
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
faceDetection.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Image Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For reading images from gallery and drawing the output in an ImageView.
|
||||||
|
FaceDetectionOptions faceDetectionOptions =
|
||||||
|
FaceDetectionOptions.builder()
|
||||||
|
.setStaticImageMode(true)
|
||||||
|
.setModelSelection(0).build();
|
||||||
|
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
|
||||||
|
|
||||||
|
// Connects MediaPipe Face Detection Solution to the user-defined ImageView
|
||||||
|
// instance that allows users to have the custom drawing of the output landmarks
|
||||||
|
// on it. See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultImageView.java
|
||||||
|
// as an example.
|
||||||
|
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
|
||||||
|
faceDetection.setResultListener(
|
||||||
|
faceDetectionResult -> {
|
||||||
|
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int width = faceDetectionResult.inputBitmap().getWidth();
|
||||||
|
int height = faceDetectionResult.inputBitmap().getHeight();
|
||||||
|
RelativeKeypoint noseTip =
|
||||||
|
faceDetectionResult
|
||||||
|
.multiFaceDetections()
|
||||||
|
.get(0)
|
||||||
|
.getLocationData()
|
||||||
|
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Detection nose tip coordinates (pixel values): x=%f, y=%f",
|
||||||
|
noseTip.getX() * width, noseTip.getY() * height));
|
||||||
|
// Request canvas drawing.
|
||||||
|
imageView.setFaceDetectionResult(faceDetectionResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
faceDetection.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
|
||||||
|
|
||||||
|
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||||
|
ActivityResultLauncher<Intent> imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
// Please also rotate the Bitmap based on its orientation.
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
faceDetection.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||||
|
imageGetter.launch(pickImageIntent);
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Video Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For video input and result rendering with OpenGL.
|
||||||
|
FaceDetectionOptions faceDetectionOptions =
|
||||||
|
FaceDetectionOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setModelSelection(0).build();
|
||||||
|
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
|
||||||
|
faceDetection.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe Face Detection Solution.
|
||||||
|
VideoInput videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(
|
||||||
|
textureFrame -> faceDetection.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
faceDetection.setResultListener(
|
||||||
|
faceDetectionResult -> {
|
||||||
|
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RelativeKeypoint noseTip =
|
||||||
|
faceDetectionResult
|
||||||
|
.multiFaceDetections()
|
||||||
|
.get(0)
|
||||||
|
.getLocationData()
|
||||||
|
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseTip.getX(), noseTip.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceDetectionResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
ActivityResultLauncher<Intent> videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
faceDetection.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||||
|
videoGetter.launch(pickVideoIntent);
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
|
|
@ -111,6 +111,23 @@ You can find more information about the face landmark model in this
|
||||||
:------------------------------------------------------------------------: |
|
:------------------------------------------------------------------------: |
|
||||||
*Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
|
*Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
|
||||||
|
|
||||||
|
#### Attention Mesh Model
|
||||||
|
|
||||||
|
In addition to the [Face Landmark Model](#face-landmark-model) we provide
|
||||||
|
another model that applies
|
||||||
|
[attention](https://en.wikipedia.org/wiki/Attention_(machine_learning)) to
|
||||||
|
semantically meaningful face regions, and therefore predicting landmarks more
|
||||||
|
accurately around lips, eyes and irises, at the expense of more compute. It
|
||||||
|
enables applications like AR makeup and AR puppeteering.
|
||||||
|
|
||||||
|
The attention mesh model can be selected in the Solution APIs via the
|
||||||
|
[refine_landmarks](#refine_landmarks) option. You can also find more information
|
||||||
|
about the model in this [paper](https://arxiv.org/abs/2006.10962).
|
||||||
|
|
||||||
|
 |
|
||||||
|
:---------------------------------------------------------------------------: |
|
||||||
|
*Fig 3. Attention Mesh: Overview of model architecture.* |
|
||||||
|
|
||||||
## Face Geometry Module
|
## Face Geometry Module
|
||||||
|
|
||||||
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
||||||
|
@ -145,8 +162,8 @@ be set freely, however for better results it is advised to set them as close to
|
||||||
the *real physical camera parameters* as possible.
|
the *real physical camera parameters* as possible.
|
||||||
|
|
||||||
 |
|
 |
|
||||||
:----------------------------------------------------------------------------: |
|
:-------------------------------------------------------------------------------: |
|
||||||
*Fig 3. A visualization of multiple key elements in the Metric 3D space.* |
|
*Fig 4. A visualization of multiple key elements in the Metric 3D space.* |
|
||||||
|
|
||||||
#### Canonical Face Model
|
#### Canonical Face Model
|
||||||
|
|
||||||
|
@ -210,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
|
||||||
|
|
||||||
|  |
|
|  |
|
||||||
| :---------------------------------------------------------------------: |
|
| :---------------------------------------------------------------------: |
|
||||||
| *Fig 4. An example of face effects rendered by the Face Geometry Effect Renderer.* |
|
| *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* |
|
||||||
|
|
||||||
## Solution APIs
|
## Solution APIs
|
||||||
|
|
||||||
|
@ -234,6 +251,12 @@ unrelated, images. Default to `false`.
|
||||||
|
|
||||||
Maximum number of faces to detect. Default to `1`.
|
Maximum number of faces to detect. Default to `1`.
|
||||||
|
|
||||||
|
#### refine_landmarks
|
||||||
|
|
||||||
|
Whether to further refine the landmark coordinates around the eyes and lips, and
|
||||||
|
output additional landmarks around the irises by applying the
|
||||||
|
[Attention Mesh Model](#attention-mesh-model). Default to `false`.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
|
||||||
|
@ -271,6 +294,7 @@ Supported configuration options:
|
||||||
|
|
||||||
* [static_image_mode](#static_image_mode)
|
* [static_image_mode](#static_image_mode)
|
||||||
* [max_num_faces](#max_num_faces)
|
* [max_num_faces](#max_num_faces)
|
||||||
|
* [refine_landmarks](#refine_landmarks)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
* [min_tracking_confidence](#min_tracking_confidence)
|
* [min_tracking_confidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -278,6 +302,7 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_face_mesh = mp.solutions.face_mesh
|
mp_face_mesh = mp.solutions.face_mesh
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
|
@ -286,6 +311,7 @@ drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||||
with mp_face_mesh.FaceMesh(
|
with mp_face_mesh.FaceMesh(
|
||||||
static_image_mode=True,
|
static_image_mode=True,
|
||||||
max_num_faces=1,
|
max_num_faces=1,
|
||||||
|
refine_landmarks=True,
|
||||||
min_detection_confidence=0.5) as face_mesh:
|
min_detection_confidence=0.5) as face_mesh:
|
||||||
for idx, file in enumerate(IMAGE_FILES):
|
for idx, file in enumerate(IMAGE_FILES):
|
||||||
image = cv2.imread(file)
|
image = cv2.imread(file)
|
||||||
|
@ -301,15 +327,32 @@ with mp_face_mesh.FaceMesh(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image=annotated_image,
|
image=annotated_image,
|
||||||
landmark_list=face_landmarks,
|
landmark_list=face_landmarks,
|
||||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||||
landmark_drawing_spec=drawing_spec,
|
landmark_drawing_spec=None,
|
||||||
connection_drawing_spec=drawing_spec)
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_tesselation_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=annotated_image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=annotated_image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_IRISES,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_iris_connections_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
|
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||||
cap = cv2.VideoCapture(0)
|
cap = cv2.VideoCapture(0)
|
||||||
with mp_face_mesh.FaceMesh(
|
with mp_face_mesh.FaceMesh(
|
||||||
|
max_num_faces=1,
|
||||||
|
refine_landmarks=True,
|
||||||
min_detection_confidence=0.5,
|
min_detection_confidence=0.5,
|
||||||
min_tracking_confidence=0.5) as face_mesh:
|
min_tracking_confidence=0.5) as face_mesh:
|
||||||
while cap.isOpened():
|
while cap.isOpened():
|
||||||
|
@ -319,12 +362,10 @@ with mp_face_mesh.FaceMesh(
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Flip the image horizontally for a later selfie-view display, and convert
|
|
||||||
# the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = face_mesh.process(image)
|
results = face_mesh.process(image)
|
||||||
|
|
||||||
# Draw the face mesh annotations on the image.
|
# Draw the face mesh annotations on the image.
|
||||||
|
@ -335,10 +376,26 @@ with mp_face_mesh.FaceMesh(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image=image,
|
image=image,
|
||||||
landmark_list=face_landmarks,
|
landmark_list=face_landmarks,
|
||||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||||
landmark_drawing_spec=drawing_spec,
|
landmark_drawing_spec=None,
|
||||||
connection_drawing_spec=drawing_spec)
|
connection_drawing_spec=mp_drawing_styles
|
||||||
cv2.imshow('MediaPipe FaceMesh', image)
|
.get_default_face_mesh_tesselation_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_IRISES,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_iris_connections_style())
|
||||||
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
|
@ -353,6 +410,7 @@ and the following usage example.
|
||||||
Supported configuration options:
|
Supported configuration options:
|
||||||
|
|
||||||
* [maxNumFaces](#max_num_faces)
|
* [maxNumFaces](#max_num_faces)
|
||||||
|
* [refineLandmarks](#refine_landmarks)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
* [minTrackingConfidence](#min_tracking_confidence)
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -393,8 +451,10 @@ function onResults(results) {
|
||||||
{color: '#C0C0C070', lineWidth: 1});
|
{color: '#C0C0C070', lineWidth: 1});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'});
|
||||||
|
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_IRIS, {color: '#FF3030'});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'});
|
||||||
|
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_IRIS, {color: '#30FF30'});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'});
|
||||||
drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'});
|
drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'});
|
||||||
}
|
}
|
||||||
|
@ -407,6 +467,7 @@ const faceMesh = new FaceMesh({locateFile: (file) => {
|
||||||
}});
|
}});
|
||||||
faceMesh.setOptions({
|
faceMesh.setOptions({
|
||||||
maxNumFaces: 1,
|
maxNumFaces: 1,
|
||||||
|
refineLandmarks: true,
|
||||||
minDetectionConfidence: 0.5,
|
minDetectionConfidence: 0.5,
|
||||||
minTrackingConfidence: 0.5
|
minTrackingConfidence: 0.5
|
||||||
});
|
});
|
||||||
|
@ -423,6 +484,202 @@ camera.start();
|
||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Android Solution API
|
||||||
|
|
||||||
|
Please first follow general
|
||||||
|
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
|
||||||
|
dependencies and try the Android Solution API in the companion
|
||||||
|
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh),
|
||||||
|
and learn more in the usage example below.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [maxNumFaces](#max_num_faces)
|
||||||
|
* [refineLandmarks](#refine_landmarks)
|
||||||
|
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
|
||||||
|
#### Camera Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For camera input and result rendering with OpenGL.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setRefineLandmarks(true)
|
||||||
|
.setMaxNumFaces(1)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
faceMesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe Face Mesh Solution.
|
||||||
|
CameraInput cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(
|
||||||
|
textureFrame -> faceMesh.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
faceMesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseLandmark.getX(), noseLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceMeshResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the GLSurfaceView is attached.
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
faceMesh.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Image Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For reading images from gallery and drawing the output in an ImageView.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setStaticImageMode(true)
|
||||||
|
.setRefineLandmarks(true)
|
||||||
|
.setMaxNumFaces(1)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
|
||||||
|
// Connects MediaPipe Face Mesh Solution to the user-defined ImageView instance
|
||||||
|
// that allows users to have the custom drawing of the output landmarks on it.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
|
||||||
|
// as an example.
|
||||||
|
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
|
||||||
|
faceMesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
int width = faceMeshResult.inputBitmap().getWidth();
|
||||||
|
int height = faceMeshResult.inputBitmap().getHeight();
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Mesh nose coordinates (pixel values): x=%f, y=%f",
|
||||||
|
noseLandmark.getX() * width, noseLandmark.getY() * height));
|
||||||
|
// Request canvas drawing.
|
||||||
|
imageView.setFaceMeshResult(faceMeshResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
faceMesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
|
||||||
|
|
||||||
|
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||||
|
ActivityResultLauncher<Intent> imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
// Please also rotate the Bitmap based on its orientation.
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
faceMesh.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||||
|
imageGetter.launch(pickImageIntent);
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Video Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For video input and result rendering with OpenGL.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setRefineLandmarks(true)
|
||||||
|
.setMaxNumFaces(1)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
faceMesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe Face Mesh Solution.
|
||||||
|
VideoInput videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(
|
||||||
|
textureFrame -> faceMesh.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
faceMesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseLandmark.getX(), noseLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceMeshResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
ActivityResultLauncher<Intent> videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
faceMesh.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||||
|
videoGetter.launch(pickVideoIntent);
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
|
|
@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
|
||||||
mobile real-time uses in a manner similar to the face detection model in
|
mobile real-time uses in a manner similar to the face detection model in
|
||||||
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
|
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
|
||||||
task: our
|
task: our
|
||||||
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite)
|
[lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
|
||||||
has to work across a variety of hand sizes with a large scale span (~20x)
|
and
|
||||||
|
[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
|
||||||
|
have to work across a variety of hand sizes with a large scale span (~20x)
|
||||||
relative to the image frame and be able to detect occluded and self-occluded
|
relative to the image frame and be able to detect occluded and self-occluded
|
||||||
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
|
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
|
||||||
region, the lack of such features in hands makes it comparatively difficult to
|
region, the lack of such features in hands makes it comparatively difficult to
|
||||||
|
@ -120,7 +122,7 @@ just 86.22%.
|
||||||
### Hand Landmark Model
|
### Hand Landmark Model
|
||||||
|
|
||||||
After the palm detection over the whole image our subsequent hand landmark
|
After the palm detection over the whole image our subsequent hand landmark
|
||||||
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite)
|
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite)
|
||||||
performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
|
performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
|
||||||
the detected hand regions via regression, that is direct coordinate prediction.
|
the detected hand regions via regression, that is direct coordinate prediction.
|
||||||
The model learns a consistent internal hand pose representation and is robust
|
The model learns a consistent internal hand pose representation and is robust
|
||||||
|
@ -163,6 +165,11 @@ unrelated, images. Default to `false`.
|
||||||
|
|
||||||
Maximum number of hands to detect. Default to `2`.
|
Maximum number of hands to detect. Default to `2`.
|
||||||
|
|
||||||
|
#### model_complexity
|
||||||
|
|
||||||
|
Complexity of the hand landmark model: `0` or `1`. Landmark accuracy as well as
|
||||||
|
inference latency generally go up with the model complexity. Default to `1`.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the
|
||||||
|
@ -190,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
|
||||||
and the smaller the value the closer the landmark is to the camera. The
|
and the smaller the value the closer the landmark is to the camera. The
|
||||||
magnitude of `z` uses roughly the same scale as `x`.
|
magnitude of `z` uses roughly the same scale as `x`.
|
||||||
|
|
||||||
|
#### multi_hand_world_landmarks
|
||||||
|
|
||||||
|
Collection of detected/tracked hands, where each hand is represented as a list
|
||||||
|
of 21 hand landmarks in world coordinates. Each landmark consists of the
|
||||||
|
following:
|
||||||
|
|
||||||
|
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
|
||||||
|
hand's approximate geometric center.
|
||||||
|
* `visibility`: Identical to that defined in the corresponding
|
||||||
|
[multi_hand_landmarks](#multi_hand_landmarks).
|
||||||
|
|
||||||
#### multi_handedness
|
#### multi_handedness
|
||||||
|
|
||||||
Collection of handedness of the detected/tracked hands (i.e. is it a left or
|
Collection of handedness of the detected/tracked hands (i.e. is it a left or
|
||||||
|
@ -212,6 +230,7 @@ Supported configuration options:
|
||||||
|
|
||||||
* [static_image_mode](#static_image_mode)
|
* [static_image_mode](#static_image_mode)
|
||||||
* [max_num_hands](#max_num_hands)
|
* [max_num_hands](#max_num_hands)
|
||||||
|
* [model_complexity](#model_complexity)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
* [min_tracking_confidence](#min_tracking_confidence)
|
* [min_tracking_confidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -219,6 +238,7 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_hands = mp.solutions.hands
|
mp_hands = mp.solutions.hands
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
|
@ -248,13 +268,24 @@ with mp_hands.Hands(
|
||||||
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
|
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
|
||||||
)
|
)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
|
annotated_image,
|
||||||
|
hand_landmarks,
|
||||||
|
mp_hands.HAND_CONNECTIONS,
|
||||||
|
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||||
|
mp_drawing_styles.get_default_hand_connections_style())
|
||||||
cv2.imwrite(
|
cv2.imwrite(
|
||||||
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
||||||
|
# Draw hand world landmarks.
|
||||||
|
if not results.multi_hand_world_landmarks:
|
||||||
|
continue
|
||||||
|
for hand_world_landmarks in results.multi_hand_world_landmarks:
|
||||||
|
mp_drawing.plot_landmarks(
|
||||||
|
hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)
|
||||||
|
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
cap = cv2.VideoCapture(0)
|
cap = cv2.VideoCapture(0)
|
||||||
with mp_hands.Hands(
|
with mp_hands.Hands(
|
||||||
|
model_complexity=0,
|
||||||
min_detection_confidence=0.5,
|
min_detection_confidence=0.5,
|
||||||
min_tracking_confidence=0.5) as hands:
|
min_tracking_confidence=0.5) as hands:
|
||||||
while cap.isOpened():
|
while cap.isOpened():
|
||||||
|
@ -264,12 +295,10 @@ with mp_hands.Hands(
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Flip the image horizontally for a later selfie-view display, and convert
|
|
||||||
# the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = hands.process(image)
|
results = hands.process(image)
|
||||||
|
|
||||||
# Draw the hand annotations on the image.
|
# Draw the hand annotations on the image.
|
||||||
|
@ -278,8 +307,13 @@ with mp_hands.Hands(
|
||||||
if results.multi_hand_landmarks:
|
if results.multi_hand_landmarks:
|
||||||
for hand_landmarks in results.multi_hand_landmarks:
|
for hand_landmarks in results.multi_hand_landmarks:
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
|
image,
|
||||||
cv2.imshow('MediaPipe Hands', image)
|
hand_landmarks,
|
||||||
|
mp_hands.HAND_CONNECTIONS,
|
||||||
|
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||||
|
mp_drawing_styles.get_default_hand_connections_style())
|
||||||
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
|
@ -294,6 +328,7 @@ and a [fun application], and the following usage example.
|
||||||
Supported configuration options:
|
Supported configuration options:
|
||||||
|
|
||||||
* [maxNumHands](#max_num_hands)
|
* [maxNumHands](#max_num_hands)
|
||||||
|
* [modelComplexity](#model_complexity)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
* [minTrackingConfidence](#min_tracking_confidence)
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -343,6 +378,7 @@ const hands = new Hands({locateFile: (file) => {
|
||||||
}});
|
}});
|
||||||
hands.setOptions({
|
hands.setOptions({
|
||||||
maxNumHands: 2,
|
maxNumHands: 2,
|
||||||
|
modelComplexity: 1,
|
||||||
minDetectionConfidence: 0.5,
|
minDetectionConfidence: 0.5,
|
||||||
minTrackingConfidence: 0.5
|
minTrackingConfidence: 0.5
|
||||||
});
|
});
|
||||||
|
@ -359,6 +395,207 @@ camera.start();
|
||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Android Solution API
|
||||||
|
|
||||||
|
Please first follow general
|
||||||
|
[instructions](../getting_started/android_solutions.md) to add MediaPipe Gradle
|
||||||
|
dependencies and try the Android Solution API in the companion
|
||||||
|
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands),
|
||||||
|
and learn more in the usage example below.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [maxNumHands](#max_num_hands)
|
||||||
|
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
|
||||||
|
#### Camera Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For camera input and result rendering with OpenGL.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setMaxNumHands(2)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe Hands Solution.
|
||||||
|
CameraInput cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(
|
||||||
|
textureFrame -> hands.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
if (result.multiHandLandmarks().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
NormalizedLandmark wristLandmark =
|
||||||
|
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
wristLandmark.getX(), wristLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(handsResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the GLSurfaceView is attached.
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
hands.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Image Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For reading images from gallery and drawing the output in an ImageView.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setStaticImageMode(true)
|
||||||
|
.setMaxNumHands(2)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
|
||||||
|
// Connects MediaPipe Hands Solution to the user-defined ImageView instance that
|
||||||
|
// allows users to have the custom drawing of the output landmarks on it.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
|
||||||
|
// as an example.
|
||||||
|
HandsResultImageView imageView = new HandsResultImageView(this);
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
if (result.multiHandLandmarks().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int width = handsResult.inputBitmap().getWidth();
|
||||||
|
int height = handsResult.inputBitmap().getHeight();
|
||||||
|
NormalizedLandmark wristLandmark =
|
||||||
|
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
|
||||||
|
wristLandmark.getX() * width, wristLandmark.getY() * height));
|
||||||
|
// Request canvas drawing.
|
||||||
|
imageView.setHandsResult(handsResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||||
|
ActivityResultLauncher<Intent> imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
// Please also rotate the Bitmap based on its orientation.
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
hands.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||||
|
imageGetter.launch(pickImageIntent);
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Video Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For video input and result rendering with OpenGL.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setStaticImageMode(false)
|
||||||
|
.setMaxNumHands(2)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe Hands Solution.
|
||||||
|
VideoInput videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(
|
||||||
|
textureFrame -> hands.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
if (result.multiHandLandmarks().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
NormalizedLandmark wristLandmark =
|
||||||
|
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
wristLandmark.getX(), wristLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(handsResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
ActivityResultLauncher<Intent> videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
hands.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||||
|
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||||
|
videoGetter.launch(pickVideoIntent);
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
|
|
@ -147,6 +147,23 @@ If set to `true`, the solution filters pose landmarks across different input
|
||||||
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
||||||
is also set to `true`. Default to `true`.
|
is also set to `true`. Default to `true`.
|
||||||
|
|
||||||
|
#### enable_segmentation
|
||||||
|
|
||||||
|
If set to `true`, in addition to the pose, face and hand landmarks the solution
|
||||||
|
also generates the segmentation mask. Default to `false`.
|
||||||
|
|
||||||
|
#### smooth_segmentation
|
||||||
|
|
||||||
|
If set to `true`, the solution filters segmentation masks across different input
|
||||||
|
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
|
||||||
|
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
|
||||||
|
`true`.
|
||||||
|
|
||||||
|
#### refine_face_landmarks
|
||||||
|
|
||||||
|
Whether to further refine the landmark coordinates around the eyes and lips, and
|
||||||
|
output additional landmarks around the irises. Default to `false`.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
||||||
|
@ -207,6 +224,15 @@ the camera. The magnitude of `z` uses roughly the same scale as `x`.
|
||||||
A list of 21 hand landmarks on the right hand, in the same representation as
|
A list of 21 hand landmarks on the right hand, in the same representation as
|
||||||
[left_hand_landmarks](#left_hand_landmarks).
|
[left_hand_landmarks](#left_hand_landmarks).
|
||||||
|
|
||||||
|
#### segmentation_mask
|
||||||
|
|
||||||
|
The output segmentation mask, predicted only when
|
||||||
|
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
|
||||||
|
same width and height as the input image, and contains values in `[0.0, 1.0]`
|
||||||
|
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
|
||||||
|
pixel respectively. Please refer to the platform-specific usage examples below
|
||||||
|
for usage details.
|
||||||
|
|
||||||
### Python Solution API
|
### Python Solution API
|
||||||
|
|
||||||
Please first follow general [instructions](../getting_started/python.md) to
|
Please first follow general [instructions](../getting_started/python.md) to
|
||||||
|
@ -218,6 +244,9 @@ Supported configuration options:
|
||||||
* [static_image_mode](#static_image_mode)
|
* [static_image_mode](#static_image_mode)
|
||||||
* [model_complexity](#model_complexity)
|
* [model_complexity](#model_complexity)
|
||||||
* [smooth_landmarks](#smooth_landmarks)
|
* [smooth_landmarks](#smooth_landmarks)
|
||||||
|
* [enable_segmentation](#enable_segmentation)
|
||||||
|
* [smooth_segmentation](#smooth_segmentation)
|
||||||
|
* [refine_face_landmarks](#refine_face_landmarks)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
* [min_tracking_confidence](#min_tracking_confidence)
|
* [min_tracking_confidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -225,13 +254,16 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_holistic = mp.solutions.holistic
|
mp_holistic = mp.solutions.holistic
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
IMAGE_FILES = []
|
IMAGE_FILES = []
|
||||||
with mp_holistic.Holistic(
|
with mp_holistic.Holistic(
|
||||||
static_image_mode=True,
|
static_image_mode=True,
|
||||||
model_complexity=2) as holistic:
|
model_complexity=2,
|
||||||
|
enable_segmentation=True,
|
||||||
|
refine_face_landmarks=True) as holistic:
|
||||||
for idx, file in enumerate(IMAGE_FILES):
|
for idx, file in enumerate(IMAGE_FILES):
|
||||||
image = cv2.imread(file)
|
image = cv2.imread(file)
|
||||||
image_height, image_width, _ = image.shape
|
image_height, image_width, _ = image.shape
|
||||||
|
@ -244,16 +276,29 @@ with mp_holistic.Holistic(
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
||||||
)
|
)
|
||||||
# Draw pose, left and right hands, and face landmarks on the image.
|
|
||||||
annotated_image = image.copy()
|
annotated_image = image.copy()
|
||||||
|
# Draw segmentation on the image.
|
||||||
|
# To improve segmentation around boundaries, consider applying a joint
|
||||||
|
# bilateral filter to "results.segmentation_mask" with "image".
|
||||||
|
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
|
||||||
|
bg_image = np.zeros(image.shape, dtype=np.uint8)
|
||||||
|
bg_image[:] = BG_COLOR
|
||||||
|
annotated_image = np.where(condition, annotated_image, bg_image)
|
||||||
|
# Draw pose, left and right hands, and face landmarks on the image.
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
annotated_image,
|
||||||
|
results.face_landmarks,
|
||||||
|
mp_holistic.FACEMESH_TESSELATION,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_tesselation_style())
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
annotated_image,
|
||||||
mp_drawing.draw_landmarks(
|
results.pose_landmarks,
|
||||||
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
mp_holistic.POSE_CONNECTIONS,
|
||||||
mp_drawing.draw_landmarks(
|
landmark_drawing_spec=mp_drawing_styles.
|
||||||
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
get_default_pose_landmarks_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
# Plot pose world landmarks.
|
# Plot pose world landmarks.
|
||||||
mp_drawing.plot_landmarks(
|
mp_drawing.plot_landmarks(
|
||||||
|
@ -271,26 +316,30 @@ with mp_holistic.Holistic(
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Flip the image horizontally for a later selfie-view display, and convert
|
|
||||||
# the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = holistic.process(image)
|
results = holistic.process(image)
|
||||||
|
|
||||||
# Draw landmark annotation on the image.
|
# Draw landmark annotation on the image.
|
||||||
image.flags.writeable = True
|
image.flags.writeable = True
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
image,
|
||||||
|
results.face_landmarks,
|
||||||
|
mp_holistic.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
image,
|
||||||
mp_drawing.draw_landmarks(
|
results.pose_landmarks,
|
||||||
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
mp_holistic.POSE_CONNECTIONS,
|
||||||
mp_drawing.draw_landmarks(
|
landmark_drawing_spec=mp_drawing_styles
|
||||||
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
.get_default_pose_landmarks_style())
|
||||||
cv2.imshow('MediaPipe Holistic', image)
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Holistic', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
|
@ -306,6 +355,9 @@ Supported configuration options:
|
||||||
|
|
||||||
* [modelComplexity](#model_complexity)
|
* [modelComplexity](#model_complexity)
|
||||||
* [smoothLandmarks](#smooth_landmarks)
|
* [smoothLandmarks](#smooth_landmarks)
|
||||||
|
* [enableSegmentation](#enable_segmentation)
|
||||||
|
* [smoothSegmentation](#smooth_segmentation)
|
||||||
|
* [refineFaceLandmarks](#refineFaceLandmarks)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
* [minTrackingConfidence](#min_tracking_confidence)
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -338,8 +390,20 @@ const canvasCtx = canvasElement.getContext('2d');
|
||||||
function onResults(results) {
|
function onResults(results) {
|
||||||
canvasCtx.save();
|
canvasCtx.save();
|
||||||
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
canvasCtx.drawImage(results.segmentationMask, 0, 0,
|
||||||
|
canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite existing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-in';
|
||||||
|
canvasCtx.fillStyle = '#00FF00';
|
||||||
|
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite missing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'destination-atop';
|
||||||
canvasCtx.drawImage(
|
canvasCtx.drawImage(
|
||||||
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-over';
|
||||||
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
||||||
{color: '#00FF00', lineWidth: 4});
|
{color: '#00FF00', lineWidth: 4});
|
||||||
drawLandmarks(canvasCtx, results.poseLandmarks,
|
drawLandmarks(canvasCtx, results.poseLandmarks,
|
||||||
|
@ -363,6 +427,9 @@ const holistic = new Holistic({locateFile: (file) => {
|
||||||
holistic.setOptions({
|
holistic.setOptions({
|
||||||
modelComplexity: 1,
|
modelComplexity: 1,
|
||||||
smoothLandmarks: true,
|
smoothLandmarks: true,
|
||||||
|
enableSegmentation: true,
|
||||||
|
smoothSegmentation: true,
|
||||||
|
refineFaceLandmarks: true,
|
||||||
minDetectionConfidence: 0.5,
|
minDetectionConfidence: 0.5,
|
||||||
minTrackingConfidence: 0.5
|
minTrackingConfidence: 0.5
|
||||||
});
|
});
|
||||||
|
|
|
@ -41,7 +41,10 @@ one over the other.
|
||||||
* Face landmark model:
|
* Face landmark model:
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
|
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
|
||||||
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
|
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
|
||||||
* [Model card](https://mediapipe.page.link/facemesh-mc)
|
* Face landmark model w/ attention (aka Attention Mesh):
|
||||||
|
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite)
|
||||||
|
* [Model card](https://mediapipe.page.link/facemesh-mc),
|
||||||
|
[Model card (w/ attention)](https://mediapipe.page.link/attentionmesh-mc)
|
||||||
|
|
||||||
### [Iris](https://google.github.io/mediapipe/solutions/iris)
|
### [Iris](https://google.github.io/mediapipe/solutions/iris)
|
||||||
|
|
||||||
|
@ -52,13 +55,14 @@ one over the other.
|
||||||
### [Hands](https://google.github.io/mediapipe/solutions/hands)
|
### [Hands](https://google.github.io/mediapipe/solutions/hands)
|
||||||
|
|
||||||
* Palm detection model:
|
* Palm detection model:
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
|
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
|
||||||
|
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
|
||||||
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
|
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
|
||||||
* Hand landmark model:
|
* Hand landmark model:
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite),
|
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
|
||||||
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
|
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
|
||||||
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
||||||
* [Model card](https://mediapipe.page.link/handmc), [Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
|
* [Model card](https://mediapipe.page.link/handmc)
|
||||||
|
|
||||||
### [Pose](https://google.github.io/mediapipe/solutions/pose)
|
### [Pose](https://google.github.io/mediapipe/solutions/pose)
|
||||||
|
|
||||||
|
|
|
@ -224,29 +224,33 @@ where object detection simply runs on every image. Default to `0.99`.
|
||||||
|
|
||||||
#### model_name
|
#### model_name
|
||||||
|
|
||||||
Name of the model to use for predicting 3D bounding box landmarks. Currently supports
|
Name of the model to use for predicting 3D bounding box landmarks. Currently
|
||||||
`{'Shoe', 'Chair', 'Cup', 'Camera'}`.
|
supports `{'Shoe', 'Chair', 'Cup', 'Camera'}`. Default to `Shoe`.
|
||||||
|
|
||||||
#### focal_length
|
#### focal_length
|
||||||
|
|
||||||
Camera focal length `(fx, fy)`, by default is defined in
|
By default, camera focal length defined in [NDC space](#ndc-space), i.e., `(fx,
|
||||||
[NDC space](#ndc-space). To use focal length `(fx_pixel, fy_pixel)` in
|
fy)`. Default to `(1.0, 1.0)`. To specify focal length in
|
||||||
[pixel space](#pixel-space), users should provide `image_size` = `(image_width,
|
[pixel space](#pixel-space) instead, i.e., `(fx_pixel, fy_pixel)`, users should
|
||||||
image_height)` to enable conversions inside the API. For further details about
|
provide [`image_size`](#image_size) = `(image_width, image_height)` to enable
|
||||||
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems).
|
conversions inside the API. For further details about NDC and pixel space,
|
||||||
|
please see [Coordinate Systems](#coordinate-systems).
|
||||||
|
|
||||||
#### principal_point
|
#### principal_point
|
||||||
|
|
||||||
Camera principal point `(px, py)`, by default is defined in
|
By default, camera principal point defined in [NDC space](#ndc-space), i.e.,
|
||||||
[NDC space](#ndc-space). To use principal point `(px_pixel, py_pixel)` in
|
`(px, py)`. Default to `(0.0, 0.0)`. To specify principal point in
|
||||||
[pixel space](#pixel-space), users should provide `image_size` = `(image_width,
|
[pixel space](#pixel-space), i.e.,`(px_pixel, py_pixel)`, users should provide
|
||||||
image_height)` to enable conversions inside the API. For further details about
|
[`image_size`](#image_size) = `(image_width, image_height)` to enable
|
||||||
NDC and pixel space, please see [Coordinate Systems](#coordinate-systems).
|
conversions inside the API. For further details about NDC and pixel space,
|
||||||
|
please see [Coordinate Systems](#coordinate-systems).
|
||||||
|
|
||||||
#### image_size
|
#### image_size
|
||||||
|
|
||||||
(**Optional**) size `(image_width, image_height)` of the input image, **ONLY**
|
**Specify only when [`focal_length`](#focal_length) and
|
||||||
needed when use `focal_length` and `principal_point` in pixel space.
|
[`principal_point`](#principal_point) are specified in pixel space.**
|
||||||
|
|
||||||
|
Size of the input image, i.e., `(image_width, image_height)`.
|
||||||
|
|
||||||
### Output
|
### Output
|
||||||
|
|
||||||
|
@ -334,11 +338,10 @@ with mp_objectron.Objectron(static_image_mode=False,
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Convert the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = objectron.process(image)
|
results = objectron.process(image)
|
||||||
|
|
||||||
# Draw the box landmarks on the image.
|
# Draw the box landmarks on the image.
|
||||||
|
@ -350,12 +353,96 @@ with mp_objectron.Objectron(static_image_mode=False,
|
||||||
image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
|
image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
|
||||||
mp_drawing.draw_axis(image, detected_object.rotation,
|
mp_drawing.draw_axis(image, detected_object.rotation,
|
||||||
detected_object.translation)
|
detected_object.translation)
|
||||||
cv2.imshow('MediaPipe Objectron', image)
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Objectron', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## JavaScript Solution API
|
||||||
|
|
||||||
|
Please first see general [introduction](../getting_started/javascript.md) on
|
||||||
|
MediaPipe in JavaScript, then learn more in the companion [web demo](#resources)
|
||||||
|
and the following usage example.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [maxNumObjects](#max_num_objects)
|
||||||
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
* [modelName](#model_name)
|
||||||
|
* [focalLength](#focal_length)
|
||||||
|
* [principalPoint](#principal_point)
|
||||||
|
* [imageSize](#image_size)
|
||||||
|
|
||||||
|
```html
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<video class="input_video"></video>
|
||||||
|
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
```
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
<script type="module">
|
||||||
|
const videoElement = document.getElementsByClassName('input_video')[0];
|
||||||
|
const canvasElement = document.getElementsByClassName('output_canvas')[0];
|
||||||
|
const canvasCtx = canvasElement.getContext('2d');
|
||||||
|
|
||||||
|
function onResults(results) {
|
||||||
|
canvasCtx.save();
|
||||||
|
canvasCtx.drawImage(
|
||||||
|
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
if (!!results.objectDetections) {
|
||||||
|
for (const detectedObject of results.objectDetections) {
|
||||||
|
// Reformat keypoint information as landmarks, for easy drawing.
|
||||||
|
const landmarks: mpObjectron.Point2D[] =
|
||||||
|
detectedObject.keypoints.map(x => x.point2d);
|
||||||
|
// Draw bounding box.
|
||||||
|
drawingUtils.drawConnectors(canvasCtx, landmarks,
|
||||||
|
mpObjectron.BOX_CONNECTIONS, {color: '#FF0000'});
|
||||||
|
// Draw centroid.
|
||||||
|
drawingUtils.drawLandmarks(canvasCtx, [landmarks[0]], {color: '#FFFFFF'});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
canvasCtx.restore();
|
||||||
|
}
|
||||||
|
|
||||||
|
const objectron = new Objectron({locateFile: (file) => {
|
||||||
|
return `https://cdn.jsdelivr.net/npm/@mediapipe/objectron/${file}`;
|
||||||
|
}});
|
||||||
|
objectron.setOptions({
|
||||||
|
modelName: 'Chair',
|
||||||
|
maxNumObjects: 3,
|
||||||
|
});
|
||||||
|
objectron.onResults(onResults);
|
||||||
|
|
||||||
|
const camera = new Camera(videoElement, {
|
||||||
|
onFrame: async () => {
|
||||||
|
await objectron.send({image: videoElement});
|
||||||
|
},
|
||||||
|
width: 1280,
|
||||||
|
height: 720
|
||||||
|
});
|
||||||
|
camera.start();
|
||||||
|
</script>
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
@ -442,7 +529,7 @@ Example app bounding boxes are rendered with [GlAnimationOverlayCalculator](http
|
||||||
> ```
|
> ```
|
||||||
> and then run
|
> and then run
|
||||||
>
|
>
|
||||||
> ```build
|
> ```bash
|
||||||
> bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR]
|
> bazel run -c opt mediapipe/graphs/object_detection_3d/obj_parser:ObjParser -- input_dir=[INTERMEDIATE_OUTPUT_DIR] output_dir=[OUTPUT_DIR]
|
||||||
> ```
|
> ```
|
||||||
> INPUT_DIR should be the folder with initial asset .obj files to be processed,
|
> INPUT_DIR should be the folder with initial asset .obj files to be processed,
|
||||||
|
@ -561,11 +648,15 @@ py = -py_pixel * 2.0 / image_height + 1.0
|
||||||
[Announcing the Objectron Dataset](https://ai.googleblog.com/2020/11/announcing-objectron-dataset.html)
|
[Announcing the Objectron Dataset](https://ai.googleblog.com/2020/11/announcing-objectron-dataset.html)
|
||||||
* Google AI Blog:
|
* Google AI Blog:
|
||||||
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
|
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
|
||||||
* Paper: [Objectron: A Large Scale Dataset of Object-Centric Videos in the Wild with Pose Annotations](https://arxiv.org/abs/2012.09988), to appear in CVPR 2021
|
* Paper: [Objectron: A Large Scale Dataset of Object-Centric Videos in the
|
||||||
|
Wild with Pose Annotations](https://arxiv.org/abs/2012.09988), to appear in
|
||||||
|
CVPR 2021
|
||||||
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak
|
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak
|
||||||
Shape Supervision](https://arxiv.org/abs/2003.03522)
|
Shape Supervision](https://arxiv.org/abs/2003.03522)
|
||||||
* Paper:
|
* Paper:
|
||||||
[Instant 3D Object Tracking with Applications in Augmented Reality](https://drive.google.com/open?id=1O_zHmlgXIzAdKljp20U_JUkEHOGG52R8)
|
[Instant 3D Object Tracking with Applications in Augmented Reality](https://drive.google.com/open?id=1O_zHmlgXIzAdKljp20U_JUkEHOGG52R8)
|
||||||
([presentation](https://www.youtube.com/watch?v=9ndF1AIo7h0)), Fourth Workshop on Computer Vision for AR/VR, CVPR 2020
|
([presentation](https://www.youtube.com/watch?v=9ndF1AIo7h0)), Fourth
|
||||||
|
Workshop on Computer Vision for AR/VR, CVPR 2020
|
||||||
* [Models and model cards](./models.md#objectron)
|
* [Models and model cards](./models.md#objectron)
|
||||||
|
* [Web demo](https://code.mediapipe.dev/codepen/objectron)
|
||||||
* [Python Colab](https://mediapipe.page.link/objectron_py_colab)
|
* [Python Colab](https://mediapipe.page.link/objectron_py_colab)
|
||||||
|
|
|
@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in
|
||||||
augmented reality.
|
augmented reality.
|
||||||
|
|
||||||
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
|
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
|
||||||
33 3D landmarks on the whole body from RGB video frames utilizing our
|
33 3D landmarks and background segmentation mask on the whole body from RGB
|
||||||
|
video frames utilizing our
|
||||||
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
||||||
research that also powers the
|
research that also powers the
|
||||||
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
|
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
|
||||||
|
@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be
|
||||||
effective in our [MediaPipe Hands](./hands.md) and
|
effective in our [MediaPipe Hands](./hands.md) and
|
||||||
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
|
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
|
||||||
first locates the person/pose region-of-interest (ROI) within the frame. The
|
first locates the person/pose region-of-interest (ROI) within the frame. The
|
||||||
tracker subsequently predicts the pose landmarks within the ROI using the
|
tracker subsequently predicts the pose landmarks and segmentation mask within
|
||||||
ROI-cropped frame as input. Note that for video use cases the detector is
|
the ROI using the ROI-cropped frame as input. Note that for video use cases the
|
||||||
invoked only as needed, i.e., for the very first frame and when the tracker
|
detector is invoked only as needed, i.e., for the very first frame and when the
|
||||||
could no longer identify body pose presence in the previous frame. For other
|
tracker could no longer identify body pose presence in the previous frame. For
|
||||||
frames the pipeline simply derives the ROI from the previous frame’s pose
|
other frames the pipeline simply derives the ROI from the previous frame’s pose
|
||||||
landmarks.
|
landmarks.
|
||||||
|
|
||||||
The pipeline is implemented as a MediaPipe
|
The pipeline is implemented as a MediaPipe
|
||||||
|
@ -87,11 +88,11 @@ from [COCO topology](https://cocodataset.org/#keypoints-2020).
|
||||||
|
|
||||||
Method | Yoga <br/> [`mAP`] | Yoga <br/> [`PCK@0.2`] | Dance <br/> [`mAP`] | Dance <br/> [`PCK@0.2`] | HIIT <br/> [`mAP`] | HIIT <br/> [`PCK@0.2`]
|
Method | Yoga <br/> [`mAP`] | Yoga <br/> [`PCK@0.2`] | Dance <br/> [`mAP`] | Dance <br/> [`PCK@0.2`] | HIIT <br/> [`mAP`] | HIIT <br/> [`PCK@0.2`]
|
||||||
----------------------------------------------------------------------------------------------------- | -----------------: | ---------------------: | ------------------: | ----------------------: | -----------------: | ---------------------:
|
----------------------------------------------------------------------------------------------------- | -----------------: | ---------------------: | ------------------: | ----------------------: | -----------------: | ---------------------:
|
||||||
BlazePose.Heavy | 68.1 | **96.4** | 73.0 | **97.2** | 74.0 | **97.5**
|
BlazePose GHUM Heavy | 68.1 | **96.4** | 73.0 | **97.2** | 74.0 | **97.5**
|
||||||
BlazePose.Full | 62.6 | **95.5** | 67.4 | **96.3** | 68.0 | **95.7**
|
BlazePose GHUM Full | 62.6 | **95.5** | 67.4 | **96.3** | 68.0 | **95.7**
|
||||||
BlazePose.Lite | 45.0 | **90.2** | 53.6 | **92.5** | 53.8 | **93.5**
|
BlazePose GHUM Lite | 45.0 | **90.2** | 53.6 | **92.5** | 53.8 | **93.5**
|
||||||
[AlphaPose.ResNet50](https://github.com/MVIG-SJTU/AlphaPose) | 63.4 | **96.0** | 57.8 | **95.5** | 63.4 | **96.0**
|
[AlphaPose ResNet50](https://github.com/MVIG-SJTU/AlphaPose) | 63.4 | **96.0** | 57.8 | **95.5** | 63.4 | **96.0**
|
||||||
[Apple.Vision](https://developer.apple.com/documentation/vision/detecting_human_body_poses_in_images) | 32.8 | **82.7** | 36.4 | **91.4** | 44.5 | **88.6**
|
[Apple Vision](https://developer.apple.com/documentation/vision/detecting_human_body_poses_in_images) | 32.8 | **82.7** | 36.4 | **91.4** | 44.5 | **88.6**
|
||||||
|
|
||||||
 |
|
 |
|
||||||
:--------------------------------------------------------------------------: |
|
:--------------------------------------------------------------------------: |
|
||||||
|
@ -101,10 +102,10 @@ We designed our models specifically for live perception use cases, so all of
|
||||||
them work in real-time on the majority of modern devices.
|
them work in real-time on the majority of modern devices.
|
||||||
|
|
||||||
Method | Latency <br/> Pixel 3 [TFLite GPU](https://www.tensorflow.org/lite/performance/gpu_advanced) | Latency <br/> MacBook Pro (15-inch 2017)
|
Method | Latency <br/> Pixel 3 [TFLite GPU](https://www.tensorflow.org/lite/performance/gpu_advanced) | Latency <br/> MacBook Pro (15-inch 2017)
|
||||||
--------------- | -------------------------------------------------------------------------------------------: | ---------------------------------------:
|
-------------------- | -------------------------------------------------------------------------------------------: | ---------------------------------------:
|
||||||
BlazePose.Heavy | 53 ms | 38 ms
|
BlazePose GHUM Heavy | 53 ms | 38 ms
|
||||||
BlazePose.Full | 25 ms | 27 ms
|
BlazePose GHUM Full | 25 ms | 27 ms
|
||||||
BlazePose.Lite | 20 ms | 25 ms
|
BlazePose GHUM Lite | 20 ms | 25 ms
|
||||||
|
|
||||||
## Models
|
## Models
|
||||||
|
|
||||||
|
@ -124,21 +125,24 @@ hip midpoints.
|
||||||
:----------------------------------------------------------------------------------------------------: |
|
:----------------------------------------------------------------------------------------------------: |
|
||||||
*Fig 3. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* |
|
*Fig 3. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* |
|
||||||
|
|
||||||
### Pose Landmark Model (BlazePose GHUM 3D)
|
### Pose Landmark Model (BlazePose [GHUM](https://github.com/google-research/google-research/tree/master/ghum) 3D)
|
||||||
|
|
||||||
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
|
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
|
||||||
(see figure below).
|
(see figure below).
|
||||||
|
|
||||||
Please find more detail in the
|
|
||||||
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
|
||||||
this [paper](https://arxiv.org/abs/2006.10204) and
|
|
||||||
[the model card](./models.md#pose), and the attributes in each landmark
|
|
||||||
[below](#pose_landmarks).
|
|
||||||
|
|
||||||
 |
|
 |
|
||||||
:----------------------------------------------------------------------------------------------: |
|
:----------------------------------------------------------------------------------------------: |
|
||||||
*Fig 4. 33 pose landmarks.* |
|
*Fig 4. 33 pose landmarks.* |
|
||||||
|
|
||||||
|
Optionally, MediaPipe Pose can predicts a full-body
|
||||||
|
[segmentation mask](#segmentation_mask) represented as a two-class segmentation
|
||||||
|
(human or background).
|
||||||
|
|
||||||
|
Please find more detail in the
|
||||||
|
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
||||||
|
this [paper](https://arxiv.org/abs/2006.10204),
|
||||||
|
[the model card](./models.md#pose) and the [Output](#output) section below.
|
||||||
|
|
||||||
## Solution APIs
|
## Solution APIs
|
||||||
|
|
||||||
### Cross-platform Configuration Options
|
### Cross-platform Configuration Options
|
||||||
|
@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input
|
||||||
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
||||||
is also set to `true`. Default to `true`.
|
is also set to `true`. Default to `true`.
|
||||||
|
|
||||||
|
#### enable_segmentation
|
||||||
|
|
||||||
|
If set to `true`, in addition to the pose landmarks the solution also generates
|
||||||
|
the segmentation mask. Default to `false`.
|
||||||
|
|
||||||
|
#### smooth_segmentation
|
||||||
|
|
||||||
|
If set to `true`, the solution filters segmentation masks across different input
|
||||||
|
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
|
||||||
|
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
|
||||||
|
`true`.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
||||||
|
@ -211,6 +227,19 @@ the following:
|
||||||
* `visibility`: Identical to that defined in the corresponding
|
* `visibility`: Identical to that defined in the corresponding
|
||||||
[pose_landmarks](#pose_landmarks).
|
[pose_landmarks](#pose_landmarks).
|
||||||
|
|
||||||
|
#### segmentation_mask
|
||||||
|
|
||||||
|
The output segmentation mask, predicted only when
|
||||||
|
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
|
||||||
|
same width and height as the input image, and contains values in `[0.0, 1.0]`
|
||||||
|
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
|
||||||
|
pixel respectively. Please refer to the platform-specific usage examples below
|
||||||
|
for usage details.
|
||||||
|
|
||||||
|
*Fig 6. Example of MediaPipe Pose segmentation mask.* |
|
||||||
|
:---------------------------------------------------: |
|
||||||
|
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_segmentation.mp4" type="video/mp4"></video> |
|
||||||
|
|
||||||
### Python Solution API
|
### Python Solution API
|
||||||
|
|
||||||
Please first follow general [instructions](../getting_started/python.md) to
|
Please first follow general [instructions](../getting_started/python.md) to
|
||||||
|
@ -222,6 +251,8 @@ Supported configuration options:
|
||||||
* [static_image_mode](#static_image_mode)
|
* [static_image_mode](#static_image_mode)
|
||||||
* [model_complexity](#model_complexity)
|
* [model_complexity](#model_complexity)
|
||||||
* [smooth_landmarks](#smooth_landmarks)
|
* [smooth_landmarks](#smooth_landmarks)
|
||||||
|
* [enable_segmentation](#enable_segmentation)
|
||||||
|
* [smooth_segmentation](#smooth_segmentation)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
* [min_tracking_confidence](#min_tracking_confidence)
|
* [min_tracking_confidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -229,13 +260,16 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_pose = mp.solutions.pose
|
mp_pose = mp.solutions.pose
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
IMAGE_FILES = []
|
IMAGE_FILES = []
|
||||||
|
BG_COLOR = (192, 192, 192) # gray
|
||||||
with mp_pose.Pose(
|
with mp_pose.Pose(
|
||||||
static_image_mode=True,
|
static_image_mode=True,
|
||||||
model_complexity=2,
|
model_complexity=2,
|
||||||
|
enable_segmentation=True,
|
||||||
min_detection_confidence=0.5) as pose:
|
min_detection_confidence=0.5) as pose:
|
||||||
for idx, file in enumerate(IMAGE_FILES):
|
for idx, file in enumerate(IMAGE_FILES):
|
||||||
image = cv2.imread(file)
|
image = cv2.imread(file)
|
||||||
|
@ -247,13 +281,24 @@ with mp_pose.Pose(
|
||||||
continue
|
continue
|
||||||
print(
|
print(
|
||||||
f'Nose coordinates: ('
|
f'Nose coordinates: ('
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].x * image_width}, '
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].y * image_height})'
|
||||||
)
|
)
|
||||||
# Draw pose landmarks on the image.
|
|
||||||
annotated_image = image.copy()
|
annotated_image = image.copy()
|
||||||
|
# Draw segmentation on the image.
|
||||||
|
# To improve segmentation around boundaries, consider applying a joint
|
||||||
|
# bilateral filter to "results.segmentation_mask" with "image".
|
||||||
|
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
|
||||||
|
bg_image = np.zeros(image.shape, dtype=np.uint8)
|
||||||
|
bg_image[:] = BG_COLOR
|
||||||
|
annotated_image = np.where(condition, annotated_image, bg_image)
|
||||||
|
# Draw pose landmarks on the image.
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
annotated_image,
|
||||||
|
results.pose_landmarks,
|
||||||
|
mp_pose.POSE_CONNECTIONS,
|
||||||
|
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
# Plot pose world landmarks.
|
# Plot pose world landmarks.
|
||||||
mp_drawing.plot_landmarks(
|
mp_drawing.plot_landmarks(
|
||||||
|
@ -271,20 +316,22 @@ with mp_pose.Pose(
|
||||||
# If loading a video, use 'break' instead of 'continue'.
|
# If loading a video, use 'break' instead of 'continue'.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Flip the image horizontally for a later selfie-view display, and convert
|
|
||||||
# the BGR image to RGB.
|
|
||||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
|
||||||
# To improve performance, optionally mark the image as not writeable to
|
# To improve performance, optionally mark the image as not writeable to
|
||||||
# pass by reference.
|
# pass by reference.
|
||||||
image.flags.writeable = False
|
image.flags.writeable = False
|
||||||
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
results = pose.process(image)
|
results = pose.process(image)
|
||||||
|
|
||||||
# Draw the pose annotation on the image.
|
# Draw the pose annotation on the image.
|
||||||
image.flags.writeable = True
|
image.flags.writeable = True
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
image,
|
||||||
cv2.imshow('MediaPipe Pose', image)
|
results.pose_landmarks,
|
||||||
|
mp_pose.POSE_CONNECTIONS,
|
||||||
|
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||||
|
# Flip the image horizontally for a selfie-view display.
|
||||||
|
cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
|
@ -300,6 +347,8 @@ Supported configuration options:
|
||||||
|
|
||||||
* [modelComplexity](#model_complexity)
|
* [modelComplexity](#model_complexity)
|
||||||
* [smoothLandmarks](#smooth_landmarks)
|
* [smoothLandmarks](#smooth_landmarks)
|
||||||
|
* [enableSegmentation](#enable_segmentation)
|
||||||
|
* [smoothSegmentation](#smooth_segmentation)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
* [minTrackingConfidence](#min_tracking_confidence)
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -319,6 +368,7 @@ Supported configuration options:
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<video class="input_video"></video>
|
<video class="input_video"></video>
|
||||||
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
|
<canvas class="output_canvas" width="1280px" height="720px"></canvas>
|
||||||
|
<div class="landmark-grid-container"></div>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -340,8 +390,20 @@ function onResults(results) {
|
||||||
|
|
||||||
canvasCtx.save();
|
canvasCtx.save();
|
||||||
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
canvasCtx.drawImage(results.segmentationMask, 0, 0,
|
||||||
|
canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite existing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-in';
|
||||||
|
canvasCtx.fillStyle = '#00FF00';
|
||||||
|
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite missing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'destination-atop';
|
||||||
canvasCtx.drawImage(
|
canvasCtx.drawImage(
|
||||||
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-over';
|
||||||
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
||||||
{color: '#00FF00', lineWidth: 4});
|
{color: '#00FF00', lineWidth: 4});
|
||||||
drawLandmarks(canvasCtx, results.poseLandmarks,
|
drawLandmarks(canvasCtx, results.poseLandmarks,
|
||||||
|
@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => {
|
||||||
pose.setOptions({
|
pose.setOptions({
|
||||||
modelComplexity: 1,
|
modelComplexity: 1,
|
||||||
smoothLandmarks: true,
|
smoothLandmarks: true,
|
||||||
|
enableSegmentation: true,
|
||||||
|
smoothSegmentation: true,
|
||||||
minDetectionConfidence: 0.5,
|
minDetectionConfidence: 0.5,
|
||||||
minTrackingConfidence: 0.5
|
minTrackingConfidence: 0.5
|
||||||
});
|
});
|
||||||
|
@ -422,6 +486,7 @@ on how to build MediaPipe examples.
|
||||||
[BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204)
|
[BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204)
|
||||||
([presentation](https://youtu.be/YPpUOTRn5tA))
|
([presentation](https://youtu.be/YPpUOTRn5tA))
|
||||||
* [Models and model cards](./models.md#pose)
|
* [Models and model cards](./models.md#pose)
|
||||||
|
* [GHUM & GHUML: Generative 3D Human Shape and Articulated Pose Models](https://github.com/google-research/google-research/tree/master/ghum)
|
||||||
* [Web demo](https://code.mediapipe.dev/codepen/pose)
|
* [Web demo](https://code.mediapipe.dev/codepen/pose)
|
||||||
* [Python Colab](https://mediapipe.page.link/pose_py_colab)
|
* [Python Colab](https://mediapipe.page.link/pose_py_colab)
|
||||||
|
|
||||||
|
|
|
@ -96,6 +96,7 @@ Supported configuration options:
|
||||||
```python
|
```python
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
|
import numpy as np
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
mp_selfie_segmentation = mp.solutions.selfie_segmentation
|
mp_selfie_segmentation = mp.solutions.selfie_segmentation
|
||||||
|
|
||||||
|
@ -261,7 +262,7 @@ to visualize its associated subgraphs, please see
|
||||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1DoeyGzMmWUsjfVgZfGGecrn7GKzYcEAo/view?usp=sharing)
|
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1DoeyGzMmWUsjfVgZfGGecrn7GKzYcEAo/view?usp=sharing)
|
||||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu:selfiesegmentationgpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu/BUILD)
|
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu:selfiesegmentationgpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/selfiesegmentationgpu/BUILD)
|
||||||
* iOS target:
|
* iOS target:
|
||||||
[`mediapipe/examples/ios/selfiesegmentationgpu:SelfieSegmentationGpuApp`](http:/mediapipe/examples/ios/selfiesegmentationgpu/BUILD)
|
[`mediapipe/examples/ios/selfiesegmentationgpu:SelfieSegmentationGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/selfiesegmentationgpu/BUILD)
|
||||||
|
|
||||||
### Desktop
|
### Desktop
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,9 @@ has_toc: false
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
MediaPipe offers open source cross-platform, customizable ML solutions for live
|
||||||
|
and streaming media.
|
||||||
|
|
||||||
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
|
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
|
||||||
<!-- Whenever this table is updated, paste a copy to ../external_index.md. -->
|
<!-- Whenever this table is updated, paste a copy to ../external_index.md. -->
|
||||||
|
|
||||||
|
@ -29,7 +32,7 @@ has_toc: false
|
||||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||||
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
|
||||||
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
|
||||||
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | |
|
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ |
|
||||||
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
|
||||||
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
|
||||||
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
|
||||||
|
|
|
@ -140,6 +140,16 @@ mediapipe_proto_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "graph_profile_calculator_proto",
|
||||||
|
srcs = ["graph_profile_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "add_header_calculator",
|
name = "add_header_calculator",
|
||||||
srcs = ["add_header_calculator.cc"],
|
srcs = ["add_header_calculator.cc"],
|
||||||
|
@ -521,9 +531,13 @@ cc_test(
|
||||||
":split_vector_calculator",
|
":split_vector_calculator",
|
||||||
"//mediapipe/framework:calculator_framework",
|
"//mediapipe/framework:calculator_framework",
|
||||||
"//mediapipe/framework:calculator_runner",
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
"//mediapipe/framework/port:gtest_main",
|
"//mediapipe/framework/port:gtest_main",
|
||||||
"//mediapipe/framework/port:parse_text_proto",
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
"//mediapipe/framework/port:status",
|
"//mediapipe/framework/port:status",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/types:optional",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1200,3 +1214,45 @@ cc_test(
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "graph_profile_calculator",
|
||||||
|
srcs = ["graph_profile_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":graph_profile_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_profile_cc_proto",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:packet",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "graph_profile_calculator_test",
|
||||||
|
srcs = ["graph_profile_calculator_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":graph_profile_calculator",
|
||||||
|
"//mediapipe/framework:calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_profile_cc_proto",
|
||||||
|
"//mediapipe/framework:test_calculators",
|
||||||
|
"//mediapipe/framework/deps:clock",
|
||||||
|
"//mediapipe/framework/deps:message_matchers",
|
||||||
|
"//mediapipe/framework/port:core_proto",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:integral_types",
|
||||||
|
"//mediapipe/framework/port:logging",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
"//mediapipe/framework/port:threadpool",
|
||||||
|
"//mediapipe/framework/tool:simulation_clock_executor",
|
||||||
|
"//mediapipe/framework/tool:sink",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
"@com_google_absl//absl/time",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -24,6 +24,9 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kDataTag[] = "DATA";
|
||||||
|
constexpr char kHeaderTag[] = "HEADER";
|
||||||
|
|
||||||
class AddHeaderCalculatorTest : public ::testing::Test {};
|
class AddHeaderCalculatorTest : public ::testing::Test {};
|
||||||
|
|
||||||
TEST_F(AddHeaderCalculatorTest, HeaderStream) {
|
TEST_F(AddHeaderCalculatorTest, HeaderStream) {
|
||||||
|
@ -36,11 +39,11 @@ TEST_F(AddHeaderCalculatorTest, HeaderStream) {
|
||||||
CalculatorRunner runner(node);
|
CalculatorRunner runner(node);
|
||||||
|
|
||||||
// Set header and add 5 packets.
|
// Set header and add 5 packets.
|
||||||
runner.MutableInputs()->Tag("HEADER").header =
|
runner.MutableInputs()->Tag(kHeaderTag).header =
|
||||||
Adopt(new std::string("my_header"));
|
Adopt(new std::string("my_header"));
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run calculator.
|
// Run calculator.
|
||||||
|
@ -85,13 +88,14 @@ TEST_F(AddHeaderCalculatorTest, NoPacketsOnHeaderStream) {
|
||||||
CalculatorRunner runner(node);
|
CalculatorRunner runner(node);
|
||||||
|
|
||||||
// Set header and add 5 packets.
|
// Set header and add 5 packets.
|
||||||
runner.MutableInputs()->Tag("HEADER").header =
|
runner.MutableInputs()->Tag(kHeaderTag).header =
|
||||||
Adopt(new std::string("my_header"));
|
Adopt(new std::string("my_header"));
|
||||||
runner.MutableInputs()->Tag("HEADER").packets.push_back(
|
runner.MutableInputs()
|
||||||
Adopt(new std::string("not allowed")));
|
->Tag(kHeaderTag)
|
||||||
|
.packets.push_back(Adopt(new std::string("not allowed")));
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run calculator.
|
// Run calculator.
|
||||||
|
@ -108,11 +112,11 @@ TEST_F(AddHeaderCalculatorTest, InputSidePacket) {
|
||||||
CalculatorRunner runner(node);
|
CalculatorRunner runner(node);
|
||||||
|
|
||||||
// Set header and add 5 packets.
|
// Set header and add 5 packets.
|
||||||
runner.MutableSidePackets()->Tag("HEADER") =
|
runner.MutableSidePackets()->Tag(kHeaderTag) =
|
||||||
Adopt(new std::string("my_header"));
|
Adopt(new std::string("my_header"));
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run calculator.
|
// Run calculator.
|
||||||
|
@ -143,13 +147,13 @@ TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
|
||||||
CalculatorRunner runner(node);
|
CalculatorRunner runner(node);
|
||||||
|
|
||||||
// Set both headers and add 5 packets.
|
// Set both headers and add 5 packets.
|
||||||
runner.MutableSidePackets()->Tag("HEADER") =
|
runner.MutableSidePackets()->Tag(kHeaderTag) =
|
||||||
Adopt(new std::string("my_header"));
|
Adopt(new std::string("my_header"));
|
||||||
runner.MutableSidePackets()->Tag("HEADER") =
|
runner.MutableSidePackets()->Tag(kHeaderTag) =
|
||||||
Adopt(new std::string("my_header"));
|
Adopt(new std::string("my_header"));
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
|
||||||
runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
|
runner.MutableInputs()->Tag(kDataTag).packets.push_back(packet);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run should fail because header can only be provided one way.
|
// Run should fail because header can only be provided one way.
|
||||||
|
|
|
@ -42,4 +42,13 @@ REGISTER_CALCULATOR(BeginLoopDetectionCalculator);
|
||||||
typedef BeginLoopCalculator<std::vector<Matrix>> BeginLoopMatrixCalculator;
|
typedef BeginLoopCalculator<std::vector<Matrix>> BeginLoopMatrixCalculator;
|
||||||
REGISTER_CALCULATOR(BeginLoopMatrixCalculator);
|
REGISTER_CALCULATOR(BeginLoopMatrixCalculator);
|
||||||
|
|
||||||
|
// A calculator to process std::vector<std::vector<Matrix>>.
|
||||||
|
typedef BeginLoopCalculator<std::vector<std::vector<Matrix>>>
|
||||||
|
BeginLoopMatrixVectorCalculator;
|
||||||
|
REGISTER_CALCULATOR(BeginLoopMatrixVectorCalculator);
|
||||||
|
|
||||||
|
// A calculator to process std::vector<uint64_t>.
|
||||||
|
typedef BeginLoopCalculator<std::vector<uint64_t>> BeginLoopUint64tCalculator;
|
||||||
|
REGISTER_CALCULATOR(BeginLoopUint64tCalculator);
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -19,6 +19,13 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kIncrementTag[] = "INCREMENT";
|
||||||
|
constexpr char kInitialValueTag[] = "INITIAL_VALUE";
|
||||||
|
constexpr char kBatchSizeTag[] = "BATCH_SIZE";
|
||||||
|
constexpr char kErrorCountTag[] = "ERROR_COUNT";
|
||||||
|
constexpr char kMaxCountTag[] = "MAX_COUNT";
|
||||||
|
constexpr char kErrorOnOpenTag[] = "ERROR_ON_OPEN";
|
||||||
|
|
||||||
// Source calculator that produces MAX_COUNT*BATCH_SIZE int packets of
|
// Source calculator that produces MAX_COUNT*BATCH_SIZE int packets of
|
||||||
// sequential numbers from INITIAL_VALUE (default 0) with a common
|
// sequential numbers from INITIAL_VALUE (default 0) with a common
|
||||||
// difference of INCREMENT (default 1) between successive numbers (with
|
// difference of INCREMENT (default 1) between successive numbers (with
|
||||||
|
@ -33,53 +40,53 @@ class CountingSourceCalculator : public CalculatorBase {
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Outputs().Index(0).Set<int>();
|
cc->Outputs().Index(0).Set<int>();
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("ERROR_ON_OPEN")) {
|
if (cc->InputSidePackets().HasTag(kErrorOnOpenTag)) {
|
||||||
cc->InputSidePackets().Tag("ERROR_ON_OPEN").Set<bool>();
|
cc->InputSidePackets().Tag(kErrorOnOpenTag).Set<bool>();
|
||||||
}
|
}
|
||||||
|
|
||||||
RET_CHECK(cc->InputSidePackets().HasTag("MAX_COUNT") ||
|
RET_CHECK(cc->InputSidePackets().HasTag(kMaxCountTag) ||
|
||||||
cc->InputSidePackets().HasTag("ERROR_COUNT"));
|
cc->InputSidePackets().HasTag(kErrorCountTag));
|
||||||
if (cc->InputSidePackets().HasTag("MAX_COUNT")) {
|
if (cc->InputSidePackets().HasTag(kMaxCountTag)) {
|
||||||
cc->InputSidePackets().Tag("MAX_COUNT").Set<int>();
|
cc->InputSidePackets().Tag(kMaxCountTag).Set<int>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("ERROR_COUNT")) {
|
if (cc->InputSidePackets().HasTag(kErrorCountTag)) {
|
||||||
cc->InputSidePackets().Tag("ERROR_COUNT").Set<int>();
|
cc->InputSidePackets().Tag(kErrorCountTag).Set<int>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("BATCH_SIZE")) {
|
if (cc->InputSidePackets().HasTag(kBatchSizeTag)) {
|
||||||
cc->InputSidePackets().Tag("BATCH_SIZE").Set<int>();
|
cc->InputSidePackets().Tag(kBatchSizeTag).Set<int>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("INITIAL_VALUE")) {
|
if (cc->InputSidePackets().HasTag(kInitialValueTag)) {
|
||||||
cc->InputSidePackets().Tag("INITIAL_VALUE").Set<int>();
|
cc->InputSidePackets().Tag(kInitialValueTag).Set<int>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("INCREMENT")) {
|
if (cc->InputSidePackets().HasTag(kIncrementTag)) {
|
||||||
cc->InputSidePackets().Tag("INCREMENT").Set<int>();
|
cc->InputSidePackets().Tag(kIncrementTag).Set<int>();
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Open(CalculatorContext* cc) override {
|
absl::Status Open(CalculatorContext* cc) override {
|
||||||
if (cc->InputSidePackets().HasTag("ERROR_ON_OPEN") &&
|
if (cc->InputSidePackets().HasTag(kErrorOnOpenTag) &&
|
||||||
cc->InputSidePackets().Tag("ERROR_ON_OPEN").Get<bool>()) {
|
cc->InputSidePackets().Tag(kErrorOnOpenTag).Get<bool>()) {
|
||||||
return absl::NotFoundError("expected error");
|
return absl::NotFoundError("expected error");
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("ERROR_COUNT")) {
|
if (cc->InputSidePackets().HasTag(kErrorCountTag)) {
|
||||||
error_count_ = cc->InputSidePackets().Tag("ERROR_COUNT").Get<int>();
|
error_count_ = cc->InputSidePackets().Tag(kErrorCountTag).Get<int>();
|
||||||
RET_CHECK_LE(0, error_count_);
|
RET_CHECK_LE(0, error_count_);
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("MAX_COUNT")) {
|
if (cc->InputSidePackets().HasTag(kMaxCountTag)) {
|
||||||
max_count_ = cc->InputSidePackets().Tag("MAX_COUNT").Get<int>();
|
max_count_ = cc->InputSidePackets().Tag(kMaxCountTag).Get<int>();
|
||||||
RET_CHECK_LE(0, max_count_);
|
RET_CHECK_LE(0, max_count_);
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("BATCH_SIZE")) {
|
if (cc->InputSidePackets().HasTag(kBatchSizeTag)) {
|
||||||
batch_size_ = cc->InputSidePackets().Tag("BATCH_SIZE").Get<int>();
|
batch_size_ = cc->InputSidePackets().Tag(kBatchSizeTag).Get<int>();
|
||||||
RET_CHECK_LT(0, batch_size_);
|
RET_CHECK_LT(0, batch_size_);
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("INITIAL_VALUE")) {
|
if (cc->InputSidePackets().HasTag(kInitialValueTag)) {
|
||||||
counter_ = cc->InputSidePackets().Tag("INITIAL_VALUE").Get<int>();
|
counter_ = cc->InputSidePackets().Tag(kInitialValueTag).Get<int>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("INCREMENT")) {
|
if (cc->InputSidePackets().HasTag(kIncrementTag)) {
|
||||||
increment_ = cc->InputSidePackets().Tag("INCREMENT").Get<int>();
|
increment_ = cc->InputSidePackets().Tag(kIncrementTag).Get<int>();
|
||||||
RET_CHECK_LT(0, increment_);
|
RET_CHECK_LT(0, increment_);
|
||||||
}
|
}
|
||||||
RET_CHECK(error_count_ >= 0 || max_count_ >= 0);
|
RET_CHECK(error_count_ >= 0 || max_count_ >= 0);
|
||||||
|
|
|
@ -35,11 +35,14 @@
|
||||||
// }
|
// }
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
|
||||||
|
constexpr char kEncodedTag[] = "ENCODED";
|
||||||
|
|
||||||
class DequantizeByteArrayCalculator : public CalculatorBase {
|
class DequantizeByteArrayCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("ENCODED").Set<std::string>();
|
cc->Inputs().Tag(kEncodedTag).Set<std::string>();
|
||||||
cc->Outputs().Tag("FLOAT_VECTOR").Set<std::vector<float>>();
|
cc->Outputs().Tag(kFloatVectorTag).Set<std::vector<float>>();
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,7 +69,7 @@ class DequantizeByteArrayCalculator : public CalculatorBase {
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
const std::string& encoded =
|
const std::string& encoded =
|
||||||
cc->Inputs().Tag("ENCODED").Value().Get<std::string>();
|
cc->Inputs().Tag(kEncodedTag).Value().Get<std::string>();
|
||||||
std::vector<float> float_vector;
|
std::vector<float> float_vector;
|
||||||
float_vector.reserve(encoded.length());
|
float_vector.reserve(encoded.length());
|
||||||
for (int i = 0; i < encoded.length(); ++i) {
|
for (int i = 0; i < encoded.length(); ++i) {
|
||||||
|
@ -74,7 +77,7 @@ class DequantizeByteArrayCalculator : public CalculatorBase {
|
||||||
static_cast<unsigned char>(encoded.at(i)) * scalar_ + bias_);
|
static_cast<unsigned char>(encoded.at(i)) * scalar_ + bias_);
|
||||||
}
|
}
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("FLOAT_VECTOR")
|
.Tag(kFloatVectorTag)
|
||||||
.AddPacket(MakePacket<std::vector<float>>(float_vector)
|
.AddPacket(MakePacket<std::vector<float>>(float_vector)
|
||||||
.At(cc->InputTimestamp()));
|
.At(cc->InputTimestamp()));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
|
||||||
|
constexpr char kEncodedTag[] = "ENCODED";
|
||||||
|
|
||||||
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
||||||
CalculatorGraphConfig::Node node_config =
|
CalculatorGraphConfig::Node node_config =
|
||||||
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
|
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
|
||||||
|
@ -39,7 +42,9 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
||||||
)pb");
|
)pb");
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::string empty_string;
|
std::string empty_string;
|
||||||
runner.MutableInputs()->Tag("ENCODED").packets.push_back(
|
runner.MutableInputs()
|
||||||
|
->Tag(kEncodedTag)
|
||||||
|
.packets.push_back(
|
||||||
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
EXPECT_FALSE(status.ok());
|
EXPECT_FALSE(status.ok());
|
||||||
|
@ -64,7 +69,9 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) {
|
||||||
)pb");
|
)pb");
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::string empty_string;
|
std::string empty_string;
|
||||||
runner.MutableInputs()->Tag("ENCODED").packets.push_back(
|
runner.MutableInputs()
|
||||||
|
->Tag(kEncodedTag)
|
||||||
|
.packets.push_back(
|
||||||
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
EXPECT_FALSE(status.ok());
|
EXPECT_FALSE(status.ok());
|
||||||
|
@ -89,7 +96,9 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) {
|
||||||
)pb");
|
)pb");
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::string empty_string;
|
std::string empty_string;
|
||||||
runner.MutableInputs()->Tag("ENCODED").packets.push_back(
|
runner.MutableInputs()
|
||||||
|
->Tag(kEncodedTag)
|
||||||
|
.packets.push_back(
|
||||||
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
MakePacket<std::string>(empty_string).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
EXPECT_FALSE(status.ok());
|
EXPECT_FALSE(status.ok());
|
||||||
|
@ -114,14 +123,16 @@ TEST(DequantizeByteArrayCalculatorTest, TestDequantization) {
|
||||||
)pb");
|
)pb");
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01};
|
unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01};
|
||||||
runner.MutableInputs()->Tag("ENCODED").packets.push_back(
|
runner.MutableInputs()
|
||||||
|
->Tag(kEncodedTag)
|
||||||
|
.packets.push_back(
|
||||||
MakePacket<std::string>(
|
MakePacket<std::string>(
|
||||||
std::string(reinterpret_cast<char const*>(input), 4))
|
std::string(reinterpret_cast<char const*>(input), 4))
|
||||||
.At(Timestamp(0)));
|
.At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
const std::vector<Packet>& outputs =
|
const std::vector<Packet>& outputs =
|
||||||
runner.Outputs().Tag("FLOAT_VECTOR").packets;
|
runner.Outputs().Tag(kFloatVectorTag).packets;
|
||||||
EXPECT_EQ(1, outputs.size());
|
EXPECT_EQ(1, outputs.size());
|
||||||
const std::vector<float>& result = outputs[0].Get<std::vector<float>>();
|
const std::vector<float>& result = outputs[0].Get<std::vector<float>>();
|
||||||
ASSERT_FALSE(result.empty());
|
ASSERT_FALSE(result.empty());
|
||||||
|
|
|
@ -24,6 +24,11 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kFinishedTag[] = "FINISHED";
|
||||||
|
constexpr char kAllowTag[] = "ALLOW";
|
||||||
|
constexpr char kMaxInFlightTag[] = "MAX_IN_FLIGHT";
|
||||||
|
constexpr char kOptionsTag[] = "OPTIONS";
|
||||||
|
|
||||||
// FlowLimiterCalculator is used to limit the number of frames in flight
|
// FlowLimiterCalculator is used to limit the number of frames in flight
|
||||||
// by dropping input frames when necessary.
|
// by dropping input frames when necessary.
|
||||||
//
|
//
|
||||||
|
@ -69,16 +74,19 @@ class FlowLimiterCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
auto& side_inputs = cc->InputSidePackets();
|
auto& side_inputs = cc->InputSidePackets();
|
||||||
side_inputs.Tag("OPTIONS").Set<FlowLimiterCalculatorOptions>().Optional();
|
side_inputs.Tag(kOptionsTag).Set<FlowLimiterCalculatorOptions>().Optional();
|
||||||
cc->Inputs().Tag("OPTIONS").Set<FlowLimiterCalculatorOptions>().Optional();
|
cc->Inputs()
|
||||||
|
.Tag(kOptionsTag)
|
||||||
|
.Set<FlowLimiterCalculatorOptions>()
|
||||||
|
.Optional();
|
||||||
RET_CHECK_GE(cc->Inputs().NumEntries(""), 1);
|
RET_CHECK_GE(cc->Inputs().NumEntries(""), 1);
|
||||||
for (int i = 0; i < cc->Inputs().NumEntries(""); ++i) {
|
for (int i = 0; i < cc->Inputs().NumEntries(""); ++i) {
|
||||||
cc->Inputs().Get("", i).SetAny();
|
cc->Inputs().Get("", i).SetAny();
|
||||||
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
|
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
|
||||||
}
|
}
|
||||||
cc->Inputs().Get("FINISHED", 0).SetAny();
|
cc->Inputs().Get("FINISHED", 0).SetAny();
|
||||||
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Set<int>().Optional();
|
cc->InputSidePackets().Tag(kMaxInFlightTag).Set<int>().Optional();
|
||||||
cc->Outputs().Tag("ALLOW").Set<bool>().Optional();
|
cc->Outputs().Tag(kAllowTag).Set<bool>().Optional();
|
||||||
cc->SetInputStreamHandler("ImmediateInputStreamHandler");
|
cc->SetInputStreamHandler("ImmediateInputStreamHandler");
|
||||||
cc->SetProcessTimestampBounds(true);
|
cc->SetProcessTimestampBounds(true);
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -87,9 +95,9 @@ class FlowLimiterCalculator : public CalculatorBase {
|
||||||
absl::Status Open(CalculatorContext* cc) final {
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
options_ = cc->Options<FlowLimiterCalculatorOptions>();
|
options_ = cc->Options<FlowLimiterCalculatorOptions>();
|
||||||
options_ = tool::RetrieveOptions(options_, cc->InputSidePackets());
|
options_ = tool::RetrieveOptions(options_, cc->InputSidePackets());
|
||||||
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) {
|
if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
|
||||||
options_.set_max_in_flight(
|
options_.set_max_in_flight(
|
||||||
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Get<int>());
|
cc->InputSidePackets().Tag(kMaxInFlightTag).Get<int>());
|
||||||
}
|
}
|
||||||
input_queues_.resize(cc->Inputs().NumEntries(""));
|
input_queues_.resize(cc->Inputs().NumEntries(""));
|
||||||
RET_CHECK_OK(CopyInputHeadersToOutputs(cc->Inputs(), &(cc->Outputs())));
|
RET_CHECK_OK(CopyInputHeadersToOutputs(cc->Inputs(), &(cc->Outputs())));
|
||||||
|
@ -104,8 +112,8 @@ class FlowLimiterCalculator : public CalculatorBase {
|
||||||
|
|
||||||
// Outputs a packet indicating whether a frame was sent or dropped.
|
// Outputs a packet indicating whether a frame was sent or dropped.
|
||||||
void SendAllow(bool allow, Timestamp ts, CalculatorContext* cc) {
|
void SendAllow(bool allow, Timestamp ts, CalculatorContext* cc) {
|
||||||
if (cc->Outputs().HasTag("ALLOW")) {
|
if (cc->Outputs().HasTag(kAllowTag)) {
|
||||||
cc->Outputs().Tag("ALLOW").AddPacket(MakePacket<bool>(allow).At(ts));
|
cc->Outputs().Tag(kAllowTag).AddPacket(MakePacket<bool>(allow).At(ts));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,7 +163,7 @@ class FlowLimiterCalculator : public CalculatorBase {
|
||||||
options_ = tool::RetrieveOptions(options_, cc->Inputs());
|
options_ = tool::RetrieveOptions(options_, cc->Inputs());
|
||||||
|
|
||||||
// Process the FINISHED input stream.
|
// Process the FINISHED input stream.
|
||||||
Packet finished_packet = cc->Inputs().Tag("FINISHED").Value();
|
Packet finished_packet = cc->Inputs().Tag(kFinishedTag).Value();
|
||||||
if (finished_packet.Timestamp() == cc->InputTimestamp()) {
|
if (finished_packet.Timestamp() == cc->InputTimestamp()) {
|
||||||
while (!frames_in_flight_.empty() &&
|
while (!frames_in_flight_.empty() &&
|
||||||
frames_in_flight_.front() <= finished_packet.Timestamp()) {
|
frames_in_flight_.front() <= finished_packet.Timestamp()) {
|
||||||
|
@ -210,8 +218,8 @@ class FlowLimiterCalculator : public CalculatorBase {
|
||||||
Timestamp bound =
|
Timestamp bound =
|
||||||
cc->Inputs().Get("", 0).Value().Timestamp().NextAllowedInStream();
|
cc->Inputs().Get("", 0).Value().Timestamp().NextAllowedInStream();
|
||||||
SetNextTimestampBound(bound, &cc->Outputs().Get("", 0));
|
SetNextTimestampBound(bound, &cc->Outputs().Get("", 0));
|
||||||
if (cc->Outputs().HasTag("ALLOW")) {
|
if (cc->Outputs().HasTag(kAllowTag)) {
|
||||||
SetNextTimestampBound(bound, &cc->Outputs().Tag("ALLOW"));
|
SetNextTimestampBound(bound, &cc->Outputs().Tag(kAllowTag));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ message FlowLimiterCalculatorOptions {
|
||||||
optional int32 max_in_flight = 1 [default = 1];
|
optional int32 max_in_flight = 1 [default = 1];
|
||||||
|
|
||||||
// The maximum number of frames queued waiting for processing.
|
// The maximum number of frames queued waiting for processing.
|
||||||
// The default value limits to 1 frame awaiting processing.
|
// The default value limits to 0 frames awaiting processing.
|
||||||
optional int32 max_in_queue = 2 [default = 0];
|
optional int32 max_in_queue = 2 [default = 0];
|
||||||
|
|
||||||
// The maximum time in microseconds to wait for a frame to finish processing.
|
// The maximum time in microseconds to wait for a frame to finish processing.
|
||||||
|
|
|
@ -36,6 +36,13 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kDropTimestampsTag[] = "DROP_TIMESTAMPS";
|
||||||
|
constexpr char kClockTag[] = "CLOCK";
|
||||||
|
constexpr char kWarmupTimeTag[] = "WARMUP_TIME";
|
||||||
|
constexpr char kSleepTimeTag[] = "SLEEP_TIME";
|
||||||
|
constexpr char kPacketTag[] = "PACKET";
|
||||||
|
|
||||||
// A simple Semaphore for synchronizing test threads.
|
// A simple Semaphore for synchronizing test threads.
|
||||||
class AtomicSemaphore {
|
class AtomicSemaphore {
|
||||||
public:
|
public:
|
||||||
|
@ -204,17 +211,17 @@ TEST_F(FlowLimiterCalculatorSemaphoreTest, FramesDropped) {
|
||||||
class SleepCalculator : public CalculatorBase {
|
class SleepCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("PACKET").SetAny();
|
cc->Inputs().Tag(kPacketTag).SetAny();
|
||||||
cc->Outputs().Tag("PACKET").SetSameAs(&cc->Inputs().Tag("PACKET"));
|
cc->Outputs().Tag(kPacketTag).SetSameAs(&cc->Inputs().Tag(kPacketTag));
|
||||||
cc->InputSidePackets().Tag("SLEEP_TIME").Set<int64>();
|
cc->InputSidePackets().Tag(kSleepTimeTag).Set<int64>();
|
||||||
cc->InputSidePackets().Tag("WARMUP_TIME").Set<int64>();
|
cc->InputSidePackets().Tag(kWarmupTimeTag).Set<int64>();
|
||||||
cc->InputSidePackets().Tag("CLOCK").Set<mediapipe::Clock*>();
|
cc->InputSidePackets().Tag(kClockTag).Set<mediapipe::Clock*>();
|
||||||
cc->SetTimestampOffset(0);
|
cc->SetTimestampOffset(0);
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Open(CalculatorContext* cc) final {
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
clock_ = cc->InputSidePackets().Tag("CLOCK").Get<mediapipe::Clock*>();
|
clock_ = cc->InputSidePackets().Tag(kClockTag).Get<mediapipe::Clock*>();
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,10 +229,12 @@ class SleepCalculator : public CalculatorBase {
|
||||||
++packet_count;
|
++packet_count;
|
||||||
absl::Duration sleep_time = absl::Microseconds(
|
absl::Duration sleep_time = absl::Microseconds(
|
||||||
packet_count == 1
|
packet_count == 1
|
||||||
? cc->InputSidePackets().Tag("WARMUP_TIME").Get<int64>()
|
? cc->InputSidePackets().Tag(kWarmupTimeTag).Get<int64>()
|
||||||
: cc->InputSidePackets().Tag("SLEEP_TIME").Get<int64>());
|
: cc->InputSidePackets().Tag(kSleepTimeTag).Get<int64>());
|
||||||
clock_->Sleep(sleep_time);
|
clock_->Sleep(sleep_time);
|
||||||
cc->Outputs().Tag("PACKET").AddPacket(cc->Inputs().Tag("PACKET").Value());
|
cc->Outputs()
|
||||||
|
.Tag(kPacketTag)
|
||||||
|
.AddPacket(cc->Inputs().Tag(kPacketTag).Value());
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,24 +249,27 @@ REGISTER_CALCULATOR(SleepCalculator);
|
||||||
class DropCalculator : public CalculatorBase {
|
class DropCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("PACKET").SetAny();
|
cc->Inputs().Tag(kPacketTag).SetAny();
|
||||||
cc->Outputs().Tag("PACKET").SetSameAs(&cc->Inputs().Tag("PACKET"));
|
cc->Outputs().Tag(kPacketTag).SetSameAs(&cc->Inputs().Tag(kPacketTag));
|
||||||
cc->InputSidePackets().Tag("DROP_TIMESTAMPS").Set<bool>();
|
cc->InputSidePackets().Tag(kDropTimestampsTag).Set<bool>();
|
||||||
cc->SetProcessTimestampBounds(true);
|
cc->SetProcessTimestampBounds(true);
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
if (!cc->Inputs().Tag("PACKET").Value().IsEmpty()) {
|
if (!cc->Inputs().Tag(kPacketTag).Value().IsEmpty()) {
|
||||||
++packet_count;
|
++packet_count;
|
||||||
}
|
}
|
||||||
bool drop = (packet_count == 3);
|
bool drop = (packet_count == 3);
|
||||||
if (!drop && !cc->Inputs().Tag("PACKET").Value().IsEmpty()) {
|
if (!drop && !cc->Inputs().Tag(kPacketTag).Value().IsEmpty()) {
|
||||||
cc->Outputs().Tag("PACKET").AddPacket(cc->Inputs().Tag("PACKET").Value());
|
cc->Outputs()
|
||||||
|
.Tag(kPacketTag)
|
||||||
|
.AddPacket(cc->Inputs().Tag(kPacketTag).Value());
|
||||||
}
|
}
|
||||||
if (!drop || !cc->InputSidePackets().Tag("DROP_TIMESTAMPS").Get<bool>()) {
|
if (!drop || !cc->InputSidePackets().Tag(kDropTimestampsTag).Get<bool>()) {
|
||||||
cc->Outputs().Tag("PACKET").SetNextTimestampBound(
|
cc->Outputs()
|
||||||
cc->InputTimestamp().NextAllowedInStream());
|
.Tag(kPacketTag)
|
||||||
|
.SetNextTimestampBound(cc->InputTimestamp().NextAllowedInStream());
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,11 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kStateChangeTag[] = "STATE_CHANGE";
|
||||||
|
constexpr char kDisallowTag[] = "DISALLOW";
|
||||||
|
constexpr char kAllowTag[] = "ALLOW";
|
||||||
|
|
||||||
enum GateState {
|
enum GateState {
|
||||||
GATE_UNINITIALIZED,
|
GATE_UNINITIALIZED,
|
||||||
GATE_ALLOW,
|
GATE_ALLOW,
|
||||||
|
@ -59,8 +64,9 @@ std::string ToString(GateState state) {
|
||||||
// ALLOW or DISALLOW can also be specified as an input side packet. The rules
|
// ALLOW or DISALLOW can also be specified as an input side packet. The rules
|
||||||
// for evaluation remain the same as above.
|
// for evaluation remain the same as above.
|
||||||
//
|
//
|
||||||
// ALLOW/DISALLOW inputs must be specified either using input stream or
|
// ALLOW/DISALLOW inputs must be specified either using input stream or via
|
||||||
// via input side packet but not both.
|
// input side packet but not both. If neither is specified, the behavior is then
|
||||||
|
// determined by the "allow" field in the calculator options.
|
||||||
//
|
//
|
||||||
// Intended to be used with the default input stream handler, which synchronizes
|
// Intended to be used with the default input stream handler, which synchronizes
|
||||||
// all data input streams with the ALLOW/DISALLOW control input stream.
|
// all data input streams with the ALLOW/DISALLOW control input stream.
|
||||||
|
@ -83,30 +89,33 @@ class GateCalculator : public CalculatorBase {
|
||||||
GateCalculator() {}
|
GateCalculator() {}
|
||||||
|
|
||||||
static absl::Status CheckAndInitAllowDisallowInputs(CalculatorContract* cc) {
|
static absl::Status CheckAndInitAllowDisallowInputs(CalculatorContract* cc) {
|
||||||
bool input_via_side_packet = cc->InputSidePackets().HasTag("ALLOW") ||
|
bool input_via_side_packet = cc->InputSidePackets().HasTag(kAllowTag) ||
|
||||||
cc->InputSidePackets().HasTag("DISALLOW");
|
cc->InputSidePackets().HasTag(kDisallowTag);
|
||||||
bool input_via_stream =
|
bool input_via_stream =
|
||||||
cc->Inputs().HasTag("ALLOW") || cc->Inputs().HasTag("DISALLOW");
|
cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
|
||||||
// Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW
|
|
||||||
// input.
|
|
||||||
RET_CHECK(input_via_side_packet ^ input_via_stream);
|
|
||||||
|
|
||||||
|
// Only one of input_side_packet or input_stream may specify
|
||||||
|
// ALLOW/DISALLOW input.
|
||||||
if (input_via_side_packet) {
|
if (input_via_side_packet) {
|
||||||
RET_CHECK(cc->InputSidePackets().HasTag("ALLOW") ^
|
RET_CHECK(!input_via_stream);
|
||||||
cc->InputSidePackets().HasTag("DISALLOW"));
|
RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
|
||||||
|
cc->InputSidePackets().HasTag(kDisallowTag));
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("ALLOW")) {
|
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||||
cc->InputSidePackets().Tag("ALLOW").Set<bool>();
|
cc->InputSidePackets().Tag(kAllowTag).Set<bool>().Optional();
|
||||||
} else {
|
} else {
|
||||||
cc->InputSidePackets().Tag("DISALLOW").Set<bool>();
|
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>().Optional();
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
RET_CHECK(cc->Inputs().HasTag("ALLOW") ^ cc->Inputs().HasTag("DISALLOW"));
|
if (input_via_stream) {
|
||||||
|
RET_CHECK(!input_via_side_packet);
|
||||||
|
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
|
||||||
|
cc->Inputs().HasTag(kDisallowTag));
|
||||||
|
|
||||||
if (cc->Inputs().HasTag("ALLOW")) {
|
if (cc->Inputs().HasTag(kAllowTag)) {
|
||||||
cc->Inputs().Tag("ALLOW").Set<bool>();
|
cc->Inputs().Tag(kAllowTag).Set<bool>();
|
||||||
} else {
|
} else {
|
||||||
cc->Inputs().Tag("DISALLOW").Set<bool>();
|
cc->Inputs().Tag(kDisallowTag).Set<bool>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -125,23 +134,22 @@ class GateCalculator : public CalculatorBase {
|
||||||
cc->Outputs().Get("", i).SetSameAs(&cc->Inputs().Get("", i));
|
cc->Outputs().Get("", i).SetSameAs(&cc->Inputs().Get("", i));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("STATE_CHANGE")) {
|
if (cc->Outputs().HasTag(kStateChangeTag)) {
|
||||||
cc->Outputs().Tag("STATE_CHANGE").Set<bool>();
|
cc->Outputs().Tag(kStateChangeTag).Set<bool>();
|
||||||
}
|
}
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Open(CalculatorContext* cc) final {
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
use_side_packet_for_allow_disallow_ = false;
|
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||||
if (cc->InputSidePackets().HasTag("ALLOW")) {
|
|
||||||
use_side_packet_for_allow_disallow_ = true;
|
use_side_packet_for_allow_disallow_ = true;
|
||||||
allow_by_side_packet_decision_ =
|
allow_by_side_packet_decision_ =
|
||||||
cc->InputSidePackets().Tag("ALLOW").Get<bool>();
|
cc->InputSidePackets().Tag(kAllowTag).Get<bool>();
|
||||||
} else if (cc->InputSidePackets().HasTag("DISALLOW")) {
|
} else if (cc->InputSidePackets().HasTag(kDisallowTag)) {
|
||||||
use_side_packet_for_allow_disallow_ = true;
|
use_side_packet_for_allow_disallow_ = true;
|
||||||
allow_by_side_packet_decision_ =
|
allow_by_side_packet_decision_ =
|
||||||
!cc->InputSidePackets().Tag("DISALLOW").Get<bool>();
|
!cc->InputSidePackets().Tag(kDisallowTag).Get<bool>();
|
||||||
}
|
}
|
||||||
|
|
||||||
cc->SetOffset(TimestampDiff(0));
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
@ -152,26 +160,34 @@ class GateCalculator : public CalculatorBase {
|
||||||
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
|
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
|
||||||
empty_packets_as_allow_ = options.empty_packets_as_allow();
|
empty_packets_as_allow_ = options.empty_packets_as_allow();
|
||||||
|
|
||||||
|
if (!use_side_packet_for_allow_disallow_ &&
|
||||||
|
!cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) {
|
||||||
|
use_option_for_allow_disallow_ = true;
|
||||||
|
allow_by_option_decision_ = options.allow();
|
||||||
|
}
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
bool allow = empty_packets_as_allow_;
|
bool allow = empty_packets_as_allow_;
|
||||||
if (use_side_packet_for_allow_disallow_) {
|
if (use_option_for_allow_disallow_) {
|
||||||
|
allow = allow_by_option_decision_;
|
||||||
|
} else if (use_side_packet_for_allow_disallow_) {
|
||||||
allow = allow_by_side_packet_decision_;
|
allow = allow_by_side_packet_decision_;
|
||||||
} else {
|
} else {
|
||||||
if (cc->Inputs().HasTag("ALLOW") &&
|
if (cc->Inputs().HasTag(kAllowTag) &&
|
||||||
!cc->Inputs().Tag("ALLOW").IsEmpty()) {
|
!cc->Inputs().Tag(kAllowTag).IsEmpty()) {
|
||||||
allow = cc->Inputs().Tag("ALLOW").Get<bool>();
|
allow = cc->Inputs().Tag(kAllowTag).Get<bool>();
|
||||||
}
|
}
|
||||||
if (cc->Inputs().HasTag("DISALLOW") &&
|
if (cc->Inputs().HasTag(kDisallowTag) &&
|
||||||
!cc->Inputs().Tag("DISALLOW").IsEmpty()) {
|
!cc->Inputs().Tag(kDisallowTag).IsEmpty()) {
|
||||||
allow = !cc->Inputs().Tag("DISALLOW").Get<bool>();
|
allow = !cc->Inputs().Tag(kDisallowTag).Get<bool>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW;
|
const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW;
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("STATE_CHANGE")) {
|
if (cc->Outputs().HasTag(kStateChangeTag)) {
|
||||||
if (last_gate_state_ != GATE_UNINITIALIZED &&
|
if (last_gate_state_ != GATE_UNINITIALIZED &&
|
||||||
last_gate_state_ != new_gate_state) {
|
last_gate_state_ != new_gate_state) {
|
||||||
VLOG(2) << "State transition in " << cc->NodeName() << " @ "
|
VLOG(2) << "State transition in " << cc->NodeName() << " @ "
|
||||||
|
@ -179,7 +195,7 @@ class GateCalculator : public CalculatorBase {
|
||||||
<< ToString(last_gate_state_) << " to "
|
<< ToString(last_gate_state_) << " to "
|
||||||
<< ToString(new_gate_state);
|
<< ToString(new_gate_state);
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("STATE_CHANGE")
|
.Tag(kStateChangeTag)
|
||||||
.AddPacket(MakePacket<bool>(allow).At(cc->InputTimestamp()));
|
.AddPacket(MakePacket<bool>(allow).At(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -211,8 +227,10 @@ class GateCalculator : public CalculatorBase {
|
||||||
GateState last_gate_state_ = GATE_UNINITIALIZED;
|
GateState last_gate_state_ = GATE_UNINITIALIZED;
|
||||||
int num_data_streams_;
|
int num_data_streams_;
|
||||||
bool empty_packets_as_allow_;
|
bool empty_packets_as_allow_;
|
||||||
bool use_side_packet_for_allow_disallow_;
|
bool use_side_packet_for_allow_disallow_ = false;
|
||||||
bool allow_by_side_packet_decision_;
|
bool allow_by_side_packet_decision_;
|
||||||
|
bool use_option_for_allow_disallow_ = false;
|
||||||
|
bool allow_by_option_decision_;
|
||||||
};
|
};
|
||||||
REGISTER_CALCULATOR(GateCalculator);
|
REGISTER_CALCULATOR(GateCalculator);
|
||||||
|
|
||||||
|
|
|
@ -29,4 +29,8 @@ message GateCalculatorOptions {
|
||||||
// disallowing the corresponding packets in the data input streams. Setting
|
// disallowing the corresponding packets in the data input streams. Setting
|
||||||
// this option to true inverts that, allowing the data packets to go through.
|
// this option to true inverts that, allowing the data packets to go through.
|
||||||
optional bool empty_packets_as_allow = 1;
|
optional bool empty_packets_as_allow = 1;
|
||||||
|
|
||||||
|
// Whether to allow or disallow the input streams to pass when no
|
||||||
|
// ALLOW/DISALLOW input or side input is specified.
|
||||||
|
optional bool allow = 2 [default = false];
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,9 @@ namespace mediapipe {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kDisallowTag[] = "DISALLOW";
|
||||||
|
constexpr char kAllowTag[] = "ALLOW";
|
||||||
|
|
||||||
class GateCalculatorTest : public ::testing::Test {
|
class GateCalculatorTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
// Helper to run a graph and return status.
|
// Helper to run a graph and return status.
|
||||||
|
@ -110,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) {
|
||||||
)")));
|
)")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(2, output.size());
|
||||||
|
EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value());
|
||||||
|
EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value());
|
||||||
|
EXPECT_EQ(true, output[0].Get<bool>());
|
||||||
|
EXPECT_EQ(false, output[1].Get<bool>());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(0, output.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(0, output.size());
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
||||||
SetRunner(R"(
|
SetRunner(R"(
|
||||||
calculator: "GateCalculator"
|
calculator: "GateCalculator"
|
||||||
|
@ -117,7 +182,7 @@ TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
||||||
input_stream: "test_input"
|
input_stream: "test_input"
|
||||||
output_stream: "test_output"
|
output_stream: "test_output"
|
||||||
)");
|
)");
|
||||||
runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(true));
|
runner()->MutableSidePackets()->Tag(kAllowTag) = Adopt(new bool(true));
|
||||||
|
|
||||||
constexpr int64 kTimestampValue0 = 42;
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
RunTimeStep(kTimestampValue0, true);
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
@ -139,7 +204,7 @@ TEST_F(GateCalculatorTest, AllowByDisallowSidePacketSetToFalse) {
|
||||||
input_stream: "test_input"
|
input_stream: "test_input"
|
||||||
output_stream: "test_output"
|
output_stream: "test_output"
|
||||||
)");
|
)");
|
||||||
runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(false));
|
runner()->MutableSidePackets()->Tag(kDisallowTag) = Adopt(new bool(false));
|
||||||
|
|
||||||
constexpr int64 kTimestampValue0 = 42;
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
RunTimeStep(kTimestampValue0, true);
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
@ -161,7 +226,7 @@ TEST_F(GateCalculatorTest, DisallowByALLOWSidePacketSetToFalse) {
|
||||||
input_stream: "test_input"
|
input_stream: "test_input"
|
||||||
output_stream: "test_output"
|
output_stream: "test_output"
|
||||||
)");
|
)");
|
||||||
runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(false));
|
runner()->MutableSidePackets()->Tag(kAllowTag) = Adopt(new bool(false));
|
||||||
|
|
||||||
constexpr int64 kTimestampValue0 = 42;
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
RunTimeStep(kTimestampValue0, true);
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
@ -179,7 +244,7 @@ TEST_F(GateCalculatorTest, DisallowByDISALLOWSidePacketSetToTrue) {
|
||||||
input_stream: "test_input"
|
input_stream: "test_input"
|
||||||
output_stream: "test_output"
|
output_stream: "test_output"
|
||||||
)");
|
)");
|
||||||
runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(true));
|
runner()->MutableSidePackets()->Tag(kDisallowTag) = Adopt(new bool(true));
|
||||||
|
|
||||||
constexpr int64 kTimestampValue0 = 42;
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
RunTimeStep(kTimestampValue0, true);
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
|
70
mediapipe/calculators/core/graph_profile_calculator.cc
Normal file
70
mediapipe/calculators/core/graph_profile_calculator.cc
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/core/graph_profile_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/packet.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_profile.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace api2 {
|
||||||
|
|
||||||
|
// This calculator periodically copies the GraphProfile from
|
||||||
|
// mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream.
|
||||||
|
//
|
||||||
|
// Example config:
|
||||||
|
// node {
|
||||||
|
// calculator: "GraphProfileCalculator"
|
||||||
|
// output_stream: "FRAME:any_frame"
|
||||||
|
// output_stream: "PROFILE:graph_profile"
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
class GraphProfileCalculator : public Node {
|
||||||
|
public:
|
||||||
|
static constexpr Input<AnyType>::Multiple kFrameIn{"FRAME"};
|
||||||
|
static constexpr Output<GraphProfile> kProfileOut{"PROFILE"};
|
||||||
|
|
||||||
|
MEDIAPIPE_NODE_CONTRACT(kFrameIn, kProfileOut);
|
||||||
|
|
||||||
|
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
|
auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>();
|
||||||
|
|
||||||
|
if (prev_profile_ts_ == Timestamp::Unset() ||
|
||||||
|
cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) {
|
||||||
|
prev_profile_ts_ = cc->InputTimestamp();
|
||||||
|
GraphProfile result;
|
||||||
|
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(&result));
|
||||||
|
kProfileOut(cc).Send(result);
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Timestamp prev_profile_ts_;
|
||||||
|
};
|
||||||
|
|
||||||
|
MEDIAPIPE_REGISTER_NODE(GraphProfileCalculator);
|
||||||
|
|
||||||
|
} // namespace api2
|
||||||
|
} // namespace mediapipe
|
30
mediapipe/calculators/core/graph_profile_calculator.proto
Normal file
30
mediapipe/calculators/core/graph_profile_calculator.proto
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
option objc_class_prefix = "MediaPipe";
|
||||||
|
|
||||||
|
message GraphProfileCalculatorOptions {
|
||||||
|
extend mediapipe.CalculatorOptions {
|
||||||
|
optional GraphProfileCalculatorOptions ext = 367481815;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The interval in microseconds between successive reported GraphProfiles.
|
||||||
|
optional int64 profile_interval = 1 [default = 1000000];
|
||||||
|
}
|
211
mediapipe/calculators/core/graph_profile_calculator_test.cc
Normal file
211
mediapipe/calculators/core/graph_profile_calculator_test.cc
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/time/time.h"
|
||||||
|
#include "mediapipe/framework/calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_profile.pb.h"
|
||||||
|
#include "mediapipe/framework/deps/clock.h"
|
||||||
|
#include "mediapipe/framework/deps/message_matchers.h"
|
||||||
|
#include "mediapipe/framework/port/gmock.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/integral_types.h"
|
||||||
|
#include "mediapipe/framework/port/logging.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/proto_ns.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
#include "mediapipe/framework/port/threadpool.h"
|
||||||
|
#include "mediapipe/framework/tool/simulation_clock_executor.h"
|
||||||
|
|
||||||
|
// Tests for GraphProfileCalculator.
|
||||||
|
using testing::ElementsAre;
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kClockTag[] = "CLOCK";
|
||||||
|
|
||||||
|
using mediapipe::Clock;
|
||||||
|
|
||||||
|
// A Calculator with a fixed Process call latency.
|
||||||
|
class SleepCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
cc->InputSidePackets().Tag(kClockTag).Set<std::shared_ptr<Clock>>();
|
||||||
|
cc->Inputs().Index(0).SetAny();
|
||||||
|
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
|
||||||
|
cc->SetTimestampOffset(TimestampDiff(0));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
|
clock_ =
|
||||||
|
cc->InputSidePackets().Tag(kClockTag).Get<std::shared_ptr<Clock>>();
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
|
clock_->Sleep(absl::Milliseconds(5));
|
||||||
|
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
std::shared_ptr<::mediapipe::Clock> clock_ = nullptr;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(SleepCalculator);
|
||||||
|
|
||||||
|
// Tests showing GraphProfileCalculator reporting GraphProfile output packets.
|
||||||
|
class GraphProfileCalculatorTest : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
void SetUpProfileGraph() {
|
||||||
|
ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(R"(
|
||||||
|
input_stream: "input_packets_0"
|
||||||
|
node {
|
||||||
|
calculator: 'SleepCalculator'
|
||||||
|
input_side_packet: 'CLOCK:sync_clock'
|
||||||
|
input_stream: 'input_packets_0'
|
||||||
|
output_stream: 'output_packets_1'
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "GraphProfileCalculator"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GraphProfileCalculatorOptions.ext]: {
|
||||||
|
profile_interval: 25000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
input_stream: "FRAME:output_packets_1"
|
||||||
|
output_stream: "PROFILE:output_packets_0"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
&graph_config_));
|
||||||
|
}
|
||||||
|
|
||||||
|
static Packet PacketAt(int64 ts) {
|
||||||
|
return Adopt(new int64(999)).At(Timestamp(ts));
|
||||||
|
}
|
||||||
|
static Packet None() { return Packet().At(Timestamp::OneOverPostStream()); }
|
||||||
|
static bool IsNone(const Packet& packet) {
|
||||||
|
return packet.Timestamp() == Timestamp::OneOverPostStream();
|
||||||
|
}
|
||||||
|
// Return the values of the timestamps of a vector of Packets.
|
||||||
|
static std::vector<int64> TimestampValues(
|
||||||
|
const std::vector<Packet>& packets) {
|
||||||
|
std::vector<int64> result;
|
||||||
|
for (const Packet& p : packets) {
|
||||||
|
result.push_back(p.Timestamp().Value());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runs a CalculatorGraph with a series of packet sets.
|
||||||
|
// Returns a vector of packets from each graph output stream.
|
||||||
|
void RunGraph(const std::vector<std::vector<Packet>>& input_sets,
|
||||||
|
std::vector<Packet>* output_packets) {
|
||||||
|
// Register output packet observers.
|
||||||
|
tool::AddVectorSink("output_packets_0", &graph_config_, output_packets);
|
||||||
|
|
||||||
|
// Start running the graph.
|
||||||
|
std::shared_ptr<SimulationClockExecutor> executor(
|
||||||
|
new SimulationClockExecutor(3 /*num_threads*/));
|
||||||
|
CalculatorGraph graph;
|
||||||
|
MP_ASSERT_OK(graph.SetExecutor("", executor));
|
||||||
|
graph.profiler()->SetClock(executor->GetClock());
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config_));
|
||||||
|
executor->GetClock()->ThreadStart();
|
||||||
|
MP_ASSERT_OK(graph.StartRun({
|
||||||
|
{"sync_clock",
|
||||||
|
Adopt(new std::shared_ptr<::mediapipe::Clock>(executor->GetClock()))},
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Send each packet to the graph in the specified order.
|
||||||
|
for (int t = 0; t < input_sets.size(); t++) {
|
||||||
|
const std::vector<Packet>& input_set = input_sets[t];
|
||||||
|
for (int i = 0; i < input_set.size(); i++) {
|
||||||
|
const Packet& packet = input_set[i];
|
||||||
|
if (!IsNone(packet)) {
|
||||||
|
MP_EXPECT_OK(graph.AddPacketToInputStream(
|
||||||
|
absl::StrCat("input_packets_", i), packet));
|
||||||
|
}
|
||||||
|
executor->GetClock()->Sleep(absl::Milliseconds(10));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MP_ASSERT_OK(graph.CloseAllInputStreams());
|
||||||
|
executor->GetClock()->Sleep(absl::Milliseconds(100));
|
||||||
|
executor->GetClock()->ThreadFinish();
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
CalculatorGraphConfig graph_config_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(GraphProfileCalculatorTest, GraphProfile) {
|
||||||
|
SetUpProfileGraph();
|
||||||
|
auto profiler_config = graph_config_.mutable_profiler_config();
|
||||||
|
profiler_config->set_enable_profiler(true);
|
||||||
|
profiler_config->set_trace_enabled(false);
|
||||||
|
profiler_config->set_trace_log_disabled(true);
|
||||||
|
profiler_config->set_enable_stream_latency(true);
|
||||||
|
profiler_config->set_calculator_filter(".*Calculator");
|
||||||
|
|
||||||
|
// Run the graph with a series of packet sets.
|
||||||
|
std::vector<std::vector<Packet>> input_sets = {
|
||||||
|
{PacketAt(10000)}, //
|
||||||
|
{PacketAt(20000)}, //
|
||||||
|
{PacketAt(30000)}, //
|
||||||
|
{PacketAt(40000)},
|
||||||
|
};
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
RunGraph(input_sets, &output_packets);
|
||||||
|
|
||||||
|
// Validate the output packets.
|
||||||
|
EXPECT_THAT(TimestampValues(output_packets), //
|
||||||
|
ElementsAre(10000, 40000));
|
||||||
|
|
||||||
|
GraphProfile expected_profile =
|
||||||
|
mediapipe::ParseTextProtoOrDie<GraphProfile>(R"pb(
|
||||||
|
calculator_profiles {
|
||||||
|
name: "GraphProfileCalculator"
|
||||||
|
open_runtime: 0
|
||||||
|
process_runtime { total: 0 count: 3 }
|
||||||
|
process_input_latency { total: 15000 count: 3 }
|
||||||
|
process_output_latency { total: 15000 count: 3 }
|
||||||
|
input_stream_profiles {
|
||||||
|
name: "output_packets_1"
|
||||||
|
back_edge: false
|
||||||
|
latency { total: 0 count: 3 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
calculator_profiles {
|
||||||
|
name: "SleepCalculator"
|
||||||
|
open_runtime: 0
|
||||||
|
process_runtime { total: 15000 count: 3 }
|
||||||
|
process_input_latency { total: 0 count: 3 }
|
||||||
|
process_output_latency { total: 15000 count: 3 }
|
||||||
|
input_stream_profiles {
|
||||||
|
name: "input_packets_0"
|
||||||
|
back_edge: false
|
||||||
|
latency { total: 0 count: 3 }
|
||||||
|
}
|
||||||
|
})pb");
|
||||||
|
|
||||||
|
EXPECT_THAT(output_packets[1].Get<GraphProfile>(),
|
||||||
|
mediapipe::EqualsProto(expected_profile));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
|
@ -29,6 +29,9 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kMinuendTag[] = "MINUEND";
|
||||||
|
constexpr char kSubtrahendTag[] = "SUBTRAHEND";
|
||||||
|
|
||||||
// A 3x4 Matrix of random integers in [0,1000).
|
// A 3x4 Matrix of random integers in [0,1000).
|
||||||
const char kMatrixText[] =
|
const char kMatrixText[] =
|
||||||
"rows: 3\n"
|
"rows: 3\n"
|
||||||
|
@ -104,12 +107,13 @@ TEST(MatrixSubtractCalculatorTest, SubtractFromInput) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
Matrix* side_matrix = new Matrix();
|
Matrix* side_matrix = new Matrix();
|
||||||
MatrixFromTextProto(kMatrixText, side_matrix);
|
MatrixFromTextProto(kMatrixText, side_matrix);
|
||||||
runner.MutableSidePackets()->Tag("SUBTRAHEND") = Adopt(side_matrix);
|
runner.MutableSidePackets()->Tag(kSubtrahendTag) = Adopt(side_matrix);
|
||||||
|
|
||||||
Matrix* input_matrix = new Matrix();
|
Matrix* input_matrix = new Matrix();
|
||||||
MatrixFromTextProto(kMatrixText2, input_matrix);
|
MatrixFromTextProto(kMatrixText2, input_matrix);
|
||||||
runner.MutableInputs()->Tag("MINUEND").packets.push_back(
|
runner.MutableInputs()
|
||||||
Adopt(input_matrix).At(Timestamp(0)));
|
->Tag(kMinuendTag)
|
||||||
|
.packets.push_back(Adopt(input_matrix).At(Timestamp(0)));
|
||||||
|
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
EXPECT_EQ(1, runner.Outputs().Index(0).packets.size());
|
EXPECT_EQ(1, runner.Outputs().Index(0).packets.size());
|
||||||
|
@ -133,12 +137,12 @@ TEST(MatrixSubtractCalculatorTest, SubtractFromSideMatrix) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
Matrix* side_matrix = new Matrix();
|
Matrix* side_matrix = new Matrix();
|
||||||
MatrixFromTextProto(kMatrixText, side_matrix);
|
MatrixFromTextProto(kMatrixText, side_matrix);
|
||||||
runner.MutableSidePackets()->Tag("MINUEND") = Adopt(side_matrix);
|
runner.MutableSidePackets()->Tag(kMinuendTag) = Adopt(side_matrix);
|
||||||
|
|
||||||
Matrix* input_matrix = new Matrix();
|
Matrix* input_matrix = new Matrix();
|
||||||
MatrixFromTextProto(kMatrixText2, input_matrix);
|
MatrixFromTextProto(kMatrixText2, input_matrix);
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("SUBTRAHEND")
|
->Tag(kSubtrahendTag)
|
||||||
.packets.push_back(Adopt(input_matrix).At(Timestamp(0)));
|
.packets.push_back(Adopt(input_matrix).At(Timestamp(0)));
|
||||||
|
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
|
|
|
@ -14,7 +14,11 @@
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/types/optional.h"
|
||||||
#include "mediapipe/calculators/core/split_vector_calculator.h"
|
#include "mediapipe/calculators/core/split_vector_calculator.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
#include "mediapipe/framework/calculator_framework.h"
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
#include "mediapipe/framework/calculator_runner.h"
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
#include "mediapipe/framework/port/gtest.h"
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
@ -301,4 +305,99 @@ TEST(MuxCalculatorTest, DiscardSkippedInputs_MuxInputStreamHandler) {
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
class PassThroughAndTsBoundUpdateNode : public mediapipe::api2::Node {
|
||||||
|
public:
|
||||||
|
static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
|
||||||
|
static constexpr mediapipe::api2::Output<int> kOutValue{"VALUE"};
|
||||||
|
static constexpr mediapipe::api2::Output<int> kOutTsBoundUpdate{
|
||||||
|
"TS_BOUND_UPDATE"};
|
||||||
|
MEDIAPIPE_NODE_CONTRACT(kInValue, kOutValue, kOutTsBoundUpdate);
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
kOutValue(cc).Send(kInValue(cc));
|
||||||
|
kOutTsBoundUpdate(cc).SetNextTimestampBound(
|
||||||
|
cc->InputTimestamp().NextAllowedInStream());
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MEDIAPIPE_REGISTER_NODE(PassThroughAndTsBoundUpdateNode);
|
||||||
|
|
||||||
|
class ToOptionalNode : public mediapipe::api2::Node {
|
||||||
|
public:
|
||||||
|
static constexpr mediapipe::api2::Input<int> kTick{"TICK"};
|
||||||
|
static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
|
||||||
|
static constexpr mediapipe::api2::Output<absl::optional<int>> kOutValue{
|
||||||
|
"OUTPUT"};
|
||||||
|
MEDIAPIPE_NODE_CONTRACT(kTick, kInValue, kOutValue);
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
if (kInValue(cc).IsEmpty()) {
|
||||||
|
kOutValue(cc).Send(absl::nullopt);
|
||||||
|
} else {
|
||||||
|
kOutValue(cc).Send({kInValue(cc).Get()});
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MEDIAPIPE_REGISTER_NODE(ToOptionalNode);
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
TEST(MuxCalculatorTest, HandleTimestampBoundUpdates) {
|
||||||
|
CalculatorGraphConfig config =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||||
|
R"pb(
|
||||||
|
input_stream: "select"
|
||||||
|
node {
|
||||||
|
calculator: "PassThroughAndTsBoundUpdateNode"
|
||||||
|
input_stream: "VALUE:select"
|
||||||
|
output_stream: "VALUE:select_ps"
|
||||||
|
output_stream: "TS_BOUND_UPDATE:ts_bound_update"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "MuxCalculator"
|
||||||
|
input_stream: "INPUT:0:select_ps"
|
||||||
|
input_stream: "INPUT:1:ts_bound_update"
|
||||||
|
input_stream: "SELECT:select"
|
||||||
|
output_stream: "OUTPUT:select_or_ts_bound_update"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "ToOptionalNode"
|
||||||
|
input_stream: "TICK:select"
|
||||||
|
input_stream: "VALUE:select_or_ts_bound_update"
|
||||||
|
output_stream: "OUTPUT:output"
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("output", &config, &output_packets);
|
||||||
|
|
||||||
|
CalculatorGraph graph;
|
||||||
|
MP_ASSERT_OK(graph.Initialize(config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
|
||||||
|
auto send_value_fn = [&](int value, Timestamp ts) -> absl::Status {
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
graph.AddPacketToInputStream("select", MakePacket<int>(value).At(ts)));
|
||||||
|
return graph.WaitUntilIdle();
|
||||||
|
};
|
||||||
|
|
||||||
|
MP_ASSERT_OK(send_value_fn(0, Timestamp(1)));
|
||||||
|
ASSERT_EQ(output_packets.size(), 1);
|
||||||
|
EXPECT_EQ(output_packets[0].Get<absl::optional<int>>(), 0);
|
||||||
|
|
||||||
|
MP_ASSERT_OK(send_value_fn(1, Timestamp(2)));
|
||||||
|
ASSERT_EQ(output_packets.size(), 2);
|
||||||
|
EXPECT_EQ(output_packets[1].Get<absl::optional<int>>(), absl::nullopt);
|
||||||
|
|
||||||
|
MP_ASSERT_OK(send_value_fn(0, Timestamp(3)));
|
||||||
|
ASSERT_EQ(output_packets.size(), 3);
|
||||||
|
EXPECT_EQ(output_packets[2].Get<absl::optional<int>>(), 0);
|
||||||
|
|
||||||
|
MP_ASSERT_OK(graph.CloseAllInputStreams());
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
|
||||||
const auto calculator_options =
|
const auto calculator_options =
|
||||||
cc->Options<mediapipe::PacketClonerCalculatorOptions>();
|
cc->Options<mediapipe::PacketClonerCalculatorOptions>();
|
||||||
output_only_when_all_inputs_received_ =
|
output_only_when_all_inputs_received_ =
|
||||||
calculator_options.output_only_when_all_inputs_received();
|
calculator_options.output_only_when_all_inputs_received() ||
|
||||||
|
calculator_options.output_packets_only_when_all_inputs_received();
|
||||||
|
output_empty_packets_before_all_inputs_received_ =
|
||||||
|
calculator_options.output_packets_only_when_all_inputs_received();
|
||||||
|
|
||||||
// Parse input streams.
|
// Parse input streams.
|
||||||
tick_signal_index_ = cc->Inputs().NumEntries() - 1;
|
tick_signal_index_ = cc->Inputs().NumEntries() - 1;
|
||||||
|
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
|
||||||
// Return if one of the input is null.
|
// Return if one of the input is null.
|
||||||
for (int i = 0; i < tick_signal_index_; ++i) {
|
for (int i = 0; i < tick_signal_index_; ++i) {
|
||||||
if (current_[i].IsEmpty()) {
|
if (current_[i].IsEmpty()) {
|
||||||
|
if (output_empty_packets_before_all_inputs_received_) {
|
||||||
|
SetAllNextTimestampBounds(cc);
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void SetAllNextTimestampBounds(CalculatorContext* cc) {
|
||||||
|
for (int j = 0; j < tick_signal_index_; ++j) {
|
||||||
|
cc->Outputs().Index(j).SetNextTimestampBound(
|
||||||
|
cc->InputTimestamp().NextAllowedInStream());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<Packet> current_;
|
std::vector<Packet> current_;
|
||||||
int tick_signal_index_;
|
int tick_signal_index_;
|
||||||
bool output_only_when_all_inputs_received_;
|
bool output_only_when_all_inputs_received_;
|
||||||
|
bool output_empty_packets_before_all_inputs_received_;
|
||||||
};
|
};
|
||||||
|
|
||||||
REGISTER_CALCULATOR(PacketClonerCalculator);
|
REGISTER_CALCULATOR(PacketClonerCalculator);
|
||||||
|
|
|
@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
|
||||||
// When true, this calculator will drop received TICK packets if any input
|
// When true, this calculator will drop received TICK packets if any input
|
||||||
// stream hasn't received a packet yet.
|
// stream hasn't received a packet yet.
|
||||||
optional bool output_only_when_all_inputs_received = 1 [default = false];
|
optional bool output_only_when_all_inputs_received = 1 [default = false];
|
||||||
|
|
||||||
|
// Similar with above, but also transmit empty packet for all streams before
|
||||||
|
// all inputs are received.
|
||||||
|
optional bool output_packets_only_when_all_inputs_received = 2
|
||||||
|
[default = false];
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,9 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kPresenceTag[] = "PRESENCE";
|
||||||
|
constexpr char kPacketTag[] = "PACKET";
|
||||||
|
|
||||||
// For each non empty input packet, emits a single output packet containing a
|
// For each non empty input packet, emits a single output packet containing a
|
||||||
// boolean value "true", "false" in response to empty packets (a.k.a. timestamp
|
// boolean value "true", "false" in response to empty packets (a.k.a. timestamp
|
||||||
// bound updates) This can be used to "flag" the presence of an arbitrary packet
|
// bound updates) This can be used to "flag" the presence of an arbitrary packet
|
||||||
|
@ -58,8 +61,8 @@ namespace mediapipe {
|
||||||
class PacketPresenceCalculator : public CalculatorBase {
|
class PacketPresenceCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("PACKET").SetAny();
|
cc->Inputs().Tag(kPacketTag).SetAny();
|
||||||
cc->Outputs().Tag("PRESENCE").Set<bool>();
|
cc->Outputs().Tag(kPresenceTag).Set<bool>();
|
||||||
// Process() function is invoked in response to input stream timestamp
|
// Process() function is invoked in response to input stream timestamp
|
||||||
// bound updates.
|
// bound updates.
|
||||||
cc->SetProcessTimestampBounds(true);
|
cc->SetProcessTimestampBounds(true);
|
||||||
|
@ -73,8 +76,8 @@ class PacketPresenceCalculator : public CalculatorBase {
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("PRESENCE")
|
.Tag(kPresenceTag)
|
||||||
.AddPacket(MakePacket<bool>(!cc->Inputs().Tag("PACKET").IsEmpty())
|
.AddPacket(MakePacket<bool>(!cc->Inputs().Tag(kPacketTag).IsEmpty())
|
||||||
.At(cc->InputTimestamp()));
|
.At(cc->InputTimestamp()));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,11 @@ namespace mediapipe {
|
||||||
|
|
||||||
REGISTER_CALCULATOR(PacketResamplerCalculator);
|
REGISTER_CALCULATOR(PacketResamplerCalculator);
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kSeedTag[] = "SEED";
|
||||||
|
constexpr char kVideoHeaderTag[] = "VIDEO_HEADER";
|
||||||
|
constexpr char kOptionsTag[] = "OPTIONS";
|
||||||
|
|
||||||
// Returns a TimestampDiff (assuming microseconds) corresponding to the
|
// Returns a TimestampDiff (assuming microseconds) corresponding to the
|
||||||
// given time in seconds.
|
// given time in seconds.
|
||||||
TimestampDiff TimestampDiffFromSeconds(double seconds) {
|
TimestampDiff TimestampDiffFromSeconds(double seconds) {
|
||||||
|
@ -50,16 +55,16 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) {
|
||||||
absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) {
|
absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) {
|
||||||
const auto& resampler_options =
|
const auto& resampler_options =
|
||||||
cc->Options<PacketResamplerCalculatorOptions>();
|
cc->Options<PacketResamplerCalculatorOptions>();
|
||||||
if (cc->InputSidePackets().HasTag("OPTIONS")) {
|
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
|
||||||
cc->InputSidePackets().Tag("OPTIONS").Set<CalculatorOptions>();
|
cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
|
||||||
}
|
}
|
||||||
CollectionItemId input_data_id = cc->Inputs().GetId("DATA", 0);
|
CollectionItemId input_data_id = cc->Inputs().GetId("DATA", 0);
|
||||||
if (!input_data_id.IsValid()) {
|
if (!input_data_id.IsValid()) {
|
||||||
input_data_id = cc->Inputs().GetId("", 0);
|
input_data_id = cc->Inputs().GetId("", 0);
|
||||||
}
|
}
|
||||||
cc->Inputs().Get(input_data_id).SetAny();
|
cc->Inputs().Get(input_data_id).SetAny();
|
||||||
if (cc->Inputs().HasTag("VIDEO_HEADER")) {
|
if (cc->Inputs().HasTag(kVideoHeaderTag)) {
|
||||||
cc->Inputs().Tag("VIDEO_HEADER").Set<VideoHeader>();
|
cc->Inputs().Tag(kVideoHeaderTag).Set<VideoHeader>();
|
||||||
}
|
}
|
||||||
|
|
||||||
CollectionItemId output_data_id = cc->Outputs().GetId("DATA", 0);
|
CollectionItemId output_data_id = cc->Outputs().GetId("DATA", 0);
|
||||||
|
@ -67,15 +72,15 @@ absl::Status PacketResamplerCalculator::GetContract(CalculatorContract* cc) {
|
||||||
output_data_id = cc->Outputs().GetId("", 0);
|
output_data_id = cc->Outputs().GetId("", 0);
|
||||||
}
|
}
|
||||||
cc->Outputs().Get(output_data_id).SetSameAs(&cc->Inputs().Get(input_data_id));
|
cc->Outputs().Get(output_data_id).SetSameAs(&cc->Inputs().Get(input_data_id));
|
||||||
if (cc->Outputs().HasTag("VIDEO_HEADER")) {
|
if (cc->Outputs().HasTag(kVideoHeaderTag)) {
|
||||||
cc->Outputs().Tag("VIDEO_HEADER").Set<VideoHeader>();
|
cc->Outputs().Tag(kVideoHeaderTag).Set<VideoHeader>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (resampler_options.jitter() != 0.0) {
|
if (resampler_options.jitter() != 0.0) {
|
||||||
RET_CHECK_GT(resampler_options.jitter(), 0.0);
|
RET_CHECK_GT(resampler_options.jitter(), 0.0);
|
||||||
RET_CHECK_LE(resampler_options.jitter(), 1.0);
|
RET_CHECK_LE(resampler_options.jitter(), 1.0);
|
||||||
RET_CHECK(cc->InputSidePackets().HasTag("SEED"));
|
RET_CHECK(cc->InputSidePackets().HasTag(kSeedTag));
|
||||||
cc->InputSidePackets().Tag("SEED").Set<std::string>();
|
cc->InputSidePackets().Tag(kSeedTag).Set<std::string>();
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
@ -143,9 +148,9 @@ absl::Status PacketResamplerCalculator::Open(CalculatorContext* cc) {
|
||||||
|
|
||||||
absl::Status PacketResamplerCalculator::Process(CalculatorContext* cc) {
|
absl::Status PacketResamplerCalculator::Process(CalculatorContext* cc) {
|
||||||
if (cc->InputTimestamp() == Timestamp::PreStream() &&
|
if (cc->InputTimestamp() == Timestamp::PreStream() &&
|
||||||
cc->Inputs().UsesTags() && cc->Inputs().HasTag("VIDEO_HEADER") &&
|
cc->Inputs().UsesTags() && cc->Inputs().HasTag(kVideoHeaderTag) &&
|
||||||
!cc->Inputs().Tag("VIDEO_HEADER").IsEmpty()) {
|
!cc->Inputs().Tag(kVideoHeaderTag).IsEmpty()) {
|
||||||
video_header_ = cc->Inputs().Tag("VIDEO_HEADER").Get<VideoHeader>();
|
video_header_ = cc->Inputs().Tag(kVideoHeaderTag).Get<VideoHeader>();
|
||||||
video_header_.frame_rate = frame_rate_;
|
video_header_.frame_rate = frame_rate_;
|
||||||
if (cc->Inputs().Get(input_data_id_).IsEmpty()) {
|
if (cc->Inputs().Get(input_data_id_).IsEmpty()) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -234,7 +239,7 @@ absl::Status LegacyJitterWithReflectionStrategy::Open(CalculatorContext* cc) {
|
||||||
"ignored, because we are adding jitter.";
|
"ignored, because we are adding jitter.";
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>();
|
const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
|
||||||
random_ = CreateSecureRandom(seed);
|
random_ = CreateSecureRandom(seed);
|
||||||
if (random_ == nullptr) {
|
if (random_ == nullptr) {
|
||||||
return absl::InvalidArgumentError(
|
return absl::InvalidArgumentError(
|
||||||
|
@ -357,7 +362,7 @@ absl::Status ReproducibleJitterWithReflectionStrategy::Open(
|
||||||
"ignored, because we are adding jitter.";
|
"ignored, because we are adding jitter.";
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>();
|
const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
|
||||||
random_ = CreateSecureRandom(seed);
|
random_ = CreateSecureRandom(seed);
|
||||||
if (random_ == nullptr) {
|
if (random_ == nullptr) {
|
||||||
return absl::InvalidArgumentError(
|
return absl::InvalidArgumentError(
|
||||||
|
@ -504,7 +509,7 @@ absl::Status JitterWithoutReflectionStrategy::Open(CalculatorContext* cc) {
|
||||||
"ignored, because we are adding jitter.";
|
"ignored, because we are adding jitter.";
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& seed = cc->InputSidePackets().Tag("SEED").Get<std::string>();
|
const auto& seed = cc->InputSidePackets().Tag(kSeedTag).Get<std::string>();
|
||||||
random_ = CreateSecureRandom(seed);
|
random_ = CreateSecureRandom(seed);
|
||||||
if (random_ == nullptr) {
|
if (random_ == nullptr) {
|
||||||
return absl::InvalidArgumentError(
|
return absl::InvalidArgumentError(
|
||||||
|
@ -635,9 +640,9 @@ absl::Status NoJitterStrategy::Process(CalculatorContext* cc) {
|
||||||
base_timestamp_ +
|
base_timestamp_ +
|
||||||
TimestampDiffFromSeconds(first_index / calculator_->frame_rate_);
|
TimestampDiffFromSeconds(first_index / calculator_->frame_rate_);
|
||||||
}
|
}
|
||||||
if (cc->Outputs().UsesTags() && cc->Outputs().HasTag("VIDEO_HEADER")) {
|
if (cc->Outputs().UsesTags() && cc->Outputs().HasTag(kVideoHeaderTag)) {
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("VIDEO_HEADER")
|
.Tag(kVideoHeaderTag)
|
||||||
.Add(new VideoHeader(calculator_->video_header_),
|
.Add(new VideoHeader(calculator_->video_header_),
|
||||||
Timestamp::PreStream());
|
Timestamp::PreStream());
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,12 @@ namespace mediapipe {
|
||||||
|
|
||||||
using ::testing::ElementsAre;
|
using ::testing::ElementsAre;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kOptionsTag[] = "OPTIONS";
|
||||||
|
constexpr char kSeedTag[] = "SEED";
|
||||||
|
constexpr char kVideoHeaderTag[] = "VIDEO_HEADER";
|
||||||
|
constexpr char kDataTag[] = "DATA";
|
||||||
|
|
||||||
// A simple version of CalculatorRunner with built-in convenience
|
// A simple version of CalculatorRunner with built-in convenience
|
||||||
// methods for setting inputs from a vector and checking outputs
|
// methods for setting inputs from a vector and checking outputs
|
||||||
// against expected outputs (both timestamps and contents).
|
// against expected outputs (both timestamps and contents).
|
||||||
|
@ -464,7 +470,7 @@ TEST(PacketResamplerCalculatorTest, SetVideoHeader) {
|
||||||
)pb"));
|
)pb"));
|
||||||
|
|
||||||
for (const int64 ts : {0, 5000, 10010, 15001, 19990}) {
|
for (const int64 ts : {0, 5000, 10010, 15001, 19990}) {
|
||||||
runner.MutableInputs()->Tag("DATA").packets.push_back(
|
runner.MutableInputs()->Tag(kDataTag).packets.push_back(
|
||||||
Adopt(new std::string(absl::StrCat("Frame #", ts))).At(Timestamp(ts)));
|
Adopt(new std::string(absl::StrCat("Frame #", ts))).At(Timestamp(ts)));
|
||||||
}
|
}
|
||||||
VideoHeader video_header_in;
|
VideoHeader video_header_in;
|
||||||
|
@ -474,16 +480,16 @@ TEST(PacketResamplerCalculatorTest, SetVideoHeader) {
|
||||||
video_header_in.duration = 1.0;
|
video_header_in.duration = 1.0;
|
||||||
video_header_in.format = ImageFormat::SRGB;
|
video_header_in.format = ImageFormat::SRGB;
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("VIDEO_HEADER")
|
->Tag(kVideoHeaderTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
Adopt(new VideoHeader(video_header_in)).At(Timestamp::PreStream()));
|
Adopt(new VideoHeader(video_header_in)).At(Timestamp::PreStream()));
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
|
|
||||||
ASSERT_EQ(1, runner.Outputs().Tag("VIDEO_HEADER").packets.size());
|
ASSERT_EQ(1, runner.Outputs().Tag(kVideoHeaderTag).packets.size());
|
||||||
EXPECT_EQ(Timestamp::PreStream(),
|
EXPECT_EQ(Timestamp::PreStream(),
|
||||||
runner.Outputs().Tag("VIDEO_HEADER").packets[0].Timestamp());
|
runner.Outputs().Tag(kVideoHeaderTag).packets[0].Timestamp());
|
||||||
const VideoHeader& video_header_out =
|
const VideoHeader& video_header_out =
|
||||||
runner.Outputs().Tag("VIDEO_HEADER").packets[0].Get<VideoHeader>();
|
runner.Outputs().Tag(kVideoHeaderTag).packets[0].Get<VideoHeader>();
|
||||||
EXPECT_EQ(video_header_in.width, video_header_out.width);
|
EXPECT_EQ(video_header_in.width, video_header_out.width);
|
||||||
EXPECT_EQ(video_header_in.height, video_header_out.height);
|
EXPECT_EQ(video_header_in.height, video_header_out.height);
|
||||||
EXPECT_DOUBLE_EQ(50.0, video_header_out.frame_rate);
|
EXPECT_DOUBLE_EQ(50.0, video_header_out.frame_rate);
|
||||||
|
@ -725,7 +731,7 @@ TEST(PacketResamplerCalculatorTest, OptionsSidePacket) {
|
||||||
[mediapipe.PacketResamplerCalculatorOptions.ext] {
|
[mediapipe.PacketResamplerCalculatorOptions.ext] {
|
||||||
frame_rate: 30
|
frame_rate: 30
|
||||||
})pb"));
|
})pb"));
|
||||||
runner.MutableSidePackets()->Tag("OPTIONS") = Adopt(options);
|
runner.MutableSidePackets()->Tag(kOptionsTag) = Adopt(options);
|
||||||
runner.SetInput({-222, 15000, 32000, 49999, 150000});
|
runner.SetInput({-222, 15000, 32000, 49999, 150000});
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
EXPECT_EQ(6, runner.Outputs().Index(0).packets.size());
|
EXPECT_EQ(6, runner.Outputs().Index(0).packets.size());
|
||||||
|
@ -740,7 +746,7 @@ TEST(PacketResamplerCalculatorTest, OptionsSidePacket) {
|
||||||
frame_rate: 30
|
frame_rate: 30
|
||||||
base_timestamp: 0
|
base_timestamp: 0
|
||||||
})pb"));
|
})pb"));
|
||||||
runner.MutableSidePackets()->Tag("OPTIONS") = Adopt(options);
|
runner.MutableSidePackets()->Tag(kOptionsTag) = Adopt(options);
|
||||||
|
|
||||||
runner.SetInput({-222, 15000, 32000, 49999, 150000});
|
runner.SetInput({-222, 15000, 32000, 49999, 150000});
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
|
|
|
@ -217,6 +217,7 @@ absl::Status PacketThinnerCalculator::Open(CalculatorContext* cc) {
|
||||||
header->format = video_header.format;
|
header->format = video_header.format;
|
||||||
header->width = video_header.width;
|
header->width = video_header.width;
|
||||||
header->height = video_header.height;
|
header->height = video_header.height;
|
||||||
|
header->duration = video_header.duration;
|
||||||
header->frame_rate = new_frame_rate;
|
header->frame_rate = new_frame_rate;
|
||||||
cc->Outputs().Index(0).SetHeader(Adopt(header.release()));
|
cc->Outputs().Index(0).SetHeader(Adopt(header.release()));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -29,6 +29,8 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kPeriodTag[] = "PERIOD";
|
||||||
|
|
||||||
// A simple version of CalculatorRunner with built-in convenience methods for
|
// A simple version of CalculatorRunner with built-in convenience methods for
|
||||||
// setting inputs from a vector and checking outputs against a vector of
|
// setting inputs from a vector and checking outputs against a vector of
|
||||||
// expected outputs.
|
// expected outputs.
|
||||||
|
@ -121,7 +123,7 @@ TEST(PacketThinnerCalculatorTest, ASyncUniformStreamThinningTestBySidePacket) {
|
||||||
|
|
||||||
SimpleRunner runner(node);
|
SimpleRunner runner(node);
|
||||||
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
|
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
|
||||||
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5);
|
runner.MutableSidePackets()->Tag(kPeriodTag) = MakePacket<int64>(5);
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
|
|
||||||
const std::vector<int64> expected_timestamps = {2, 8, 14};
|
const std::vector<int64> expected_timestamps = {2, 8, 14};
|
||||||
|
@ -160,7 +162,7 @@ TEST(PacketThinnerCalculatorTest, SyncUniformStreamThinningTestBySidePacket1) {
|
||||||
|
|
||||||
SimpleRunner runner(node);
|
SimpleRunner runner(node);
|
||||||
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
|
runner.SetInput({2, 4, 6, 8, 10, 12, 14});
|
||||||
runner.MutableSidePackets()->Tag("PERIOD") = MakePacket<int64>(5);
|
runner.MutableSidePackets()->Tag(kPeriodTag) = MakePacket<int64>(5);
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
|
|
||||||
const std::vector<int64> expected_timestamps = {2, 6, 10, 14};
|
const std::vector<int64> expected_timestamps = {2, 6, 10, 14};
|
||||||
|
|
|
@ -39,6 +39,8 @@ using ::testing::Pair;
|
||||||
using ::testing::Value;
|
using ::testing::Value;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kDisallowTag[] = "DISALLOW";
|
||||||
|
|
||||||
// Returns the timestamp values for a vector of Packets.
|
// Returns the timestamp values for a vector of Packets.
|
||||||
// TODO: puth this kind of test util in a common place.
|
// TODO: puth this kind of test util in a common place.
|
||||||
std::vector<int64> TimestampValues(const std::vector<Packet>& packets) {
|
std::vector<int64> TimestampValues(const std::vector<Packet>& packets) {
|
||||||
|
@ -702,14 +704,14 @@ class DroppingGateCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Index(0).SetAny();
|
cc->Inputs().Index(0).SetAny();
|
||||||
cc->Inputs().Tag("DISALLOW").Set<bool>();
|
cc->Inputs().Tag(kDisallowTag).Set<bool>();
|
||||||
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
|
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
if (!cc->Inputs().Index(0).IsEmpty() &&
|
if (!cc->Inputs().Index(0).IsEmpty() &&
|
||||||
!cc->Inputs().Tag("DISALLOW").Get<bool>()) {
|
!cc->Inputs().Tag(kDisallowTag).Get<bool>()) {
|
||||||
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
|
cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
|
|
@ -41,11 +41,14 @@
|
||||||
// }
|
// }
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kEncodedTag[] = "ENCODED";
|
||||||
|
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
|
||||||
|
|
||||||
class QuantizeFloatVectorCalculator : public CalculatorBase {
|
class QuantizeFloatVectorCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("FLOAT_VECTOR").Set<std::vector<float>>();
|
cc->Inputs().Tag(kFloatVectorTag).Set<std::vector<float>>();
|
||||||
cc->Outputs().Tag("ENCODED").Set<std::string>();
|
cc->Outputs().Tag(kEncodedTag).Set<std::string>();
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,7 +73,7 @@ class QuantizeFloatVectorCalculator : public CalculatorBase {
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
const std::vector<float>& float_vector =
|
const std::vector<float>& float_vector =
|
||||||
cc->Inputs().Tag("FLOAT_VECTOR").Value().Get<std::vector<float>>();
|
cc->Inputs().Tag(kFloatVectorTag).Value().Get<std::vector<float>>();
|
||||||
int feature_size = float_vector.size();
|
int feature_size = float_vector.size();
|
||||||
std::string encoded_features;
|
std::string encoded_features;
|
||||||
encoded_features.reserve(feature_size);
|
encoded_features.reserve(feature_size);
|
||||||
|
@ -86,7 +89,9 @@ class QuantizeFloatVectorCalculator : public CalculatorBase {
|
||||||
(old_value - min_quantized_value_) * (255.0 / range_));
|
(old_value - min_quantized_value_) * (255.0 / range_));
|
||||||
encoded_features += encoded;
|
encoded_features += encoded;
|
||||||
}
|
}
|
||||||
cc->Outputs().Tag("ENCODED").AddPacket(
|
cc->Outputs()
|
||||||
|
.Tag(kEncodedTag)
|
||||||
|
.AddPacket(
|
||||||
MakePacket<std::string>(encoded_features).At(cc->InputTimestamp()));
|
MakePacket<std::string>(encoded_features).At(cc->InputTimestamp()));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kEncodedTag[] = "ENCODED";
|
||||||
|
constexpr char kFloatVectorTag[] = "FLOAT_VECTOR";
|
||||||
|
|
||||||
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
||||||
CalculatorGraphConfig::Node node_config =
|
CalculatorGraphConfig::Node node_config =
|
||||||
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
|
ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"pb(
|
||||||
|
@ -40,7 +43,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> empty_vector;
|
std::vector<float> empty_vector;
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
|
@ -67,7 +70,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> empty_vector;
|
std::vector<float> empty_vector;
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
|
@ -94,7 +97,7 @@ TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> empty_vector;
|
std::vector<float> empty_vector;
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
||||||
auto status = runner.Run();
|
auto status = runner.Run();
|
||||||
|
@ -121,11 +124,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestEmptyVector) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> empty_vector;
|
std::vector<float> empty_vector;
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(empty_vector).At(Timestamp(0)));
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets;
|
const std::vector<Packet>& outputs =
|
||||||
|
runner.Outputs().Tag(kEncodedTag).packets;
|
||||||
EXPECT_EQ(1, outputs.size());
|
EXPECT_EQ(1, outputs.size());
|
||||||
EXPECT_TRUE(outputs[0].Get<std::string>().empty());
|
EXPECT_TRUE(outputs[0].Get<std::string>().empty());
|
||||||
EXPECT_EQ(Timestamp(0), outputs[0].Timestamp());
|
EXPECT_EQ(Timestamp(0), outputs[0].Timestamp());
|
||||||
|
@ -147,11 +151,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestNonEmptyVector) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> vector = {0.0f, -64.0f, 64.0f, -32.0f, 32.0f};
|
std::vector<float> vector = {0.0f, -64.0f, 64.0f, -32.0f, 32.0f};
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets;
|
const std::vector<Packet>& outputs =
|
||||||
|
runner.Outputs().Tag(kEncodedTag).packets;
|
||||||
EXPECT_EQ(1, outputs.size());
|
EXPECT_EQ(1, outputs.size());
|
||||||
const std::string& result = outputs[0].Get<std::string>();
|
const std::string& result = outputs[0].Get<std::string>();
|
||||||
ASSERT_FALSE(result.empty());
|
ASSERT_FALSE(result.empty());
|
||||||
|
@ -185,11 +190,12 @@ TEST(QuantizeFloatVectorCalculatorTest, TestSaturation) {
|
||||||
CalculatorRunner runner(node_config);
|
CalculatorRunner runner(node_config);
|
||||||
std::vector<float> vector = {-65.0f, 65.0f};
|
std::vector<float> vector = {-65.0f, 65.0f};
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FLOAT_VECTOR")
|
->Tag(kFloatVectorTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
|
MakePacket<std::vector<float>>(vector).At(Timestamp(0)));
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
const std::vector<Packet>& outputs = runner.Outputs().Tag("ENCODED").packets;
|
const std::vector<Packet>& outputs =
|
||||||
|
runner.Outputs().Tag(kEncodedTag).packets;
|
||||||
EXPECT_EQ(1, outputs.size());
|
EXPECT_EQ(1, outputs.size());
|
||||||
const std::string& result = outputs[0].Get<std::string>();
|
const std::string& result = outputs[0].Get<std::string>();
|
||||||
ASSERT_FALSE(result.empty());
|
ASSERT_FALSE(result.empty());
|
||||||
|
|
|
@ -23,6 +23,9 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kAllowTag[] = "ALLOW";
|
||||||
|
constexpr char kMaxInFlightTag[] = "MAX_IN_FLIGHT";
|
||||||
|
|
||||||
// RealTimeFlowLimiterCalculator is used to limit the number of pipelined
|
// RealTimeFlowLimiterCalculator is used to limit the number of pipelined
|
||||||
// processing operations in a section of the graph.
|
// processing operations in a section of the graph.
|
||||||
//
|
//
|
||||||
|
@ -86,11 +89,11 @@ class RealTimeFlowLimiterCalculator : public CalculatorBase {
|
||||||
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
|
cc->Outputs().Get("", i).SetSameAs(&(cc->Inputs().Get("", i)));
|
||||||
}
|
}
|
||||||
cc->Inputs().Get("FINISHED", 0).SetAny();
|
cc->Inputs().Get("FINISHED", 0).SetAny();
|
||||||
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) {
|
if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
|
||||||
cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Set<int>();
|
cc->InputSidePackets().Tag(kMaxInFlightTag).Set<int>();
|
||||||
}
|
}
|
||||||
if (cc->Outputs().HasTag("ALLOW")) {
|
if (cc->Outputs().HasTag(kAllowTag)) {
|
||||||
cc->Outputs().Tag("ALLOW").Set<bool>();
|
cc->Outputs().Tag(kAllowTag).Set<bool>();
|
||||||
}
|
}
|
||||||
|
|
||||||
cc->SetInputStreamHandler("ImmediateInputStreamHandler");
|
cc->SetInputStreamHandler("ImmediateInputStreamHandler");
|
||||||
|
@ -101,8 +104,8 @@ class RealTimeFlowLimiterCalculator : public CalculatorBase {
|
||||||
absl::Status Open(CalculatorContext* cc) final {
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
finished_id_ = cc->Inputs().GetId("FINISHED", 0);
|
finished_id_ = cc->Inputs().GetId("FINISHED", 0);
|
||||||
max_in_flight_ = 1;
|
max_in_flight_ = 1;
|
||||||
if (cc->InputSidePackets().HasTag("MAX_IN_FLIGHT")) {
|
if (cc->InputSidePackets().HasTag(kMaxInFlightTag)) {
|
||||||
max_in_flight_ = cc->InputSidePackets().Tag("MAX_IN_FLIGHT").Get<int>();
|
max_in_flight_ = cc->InputSidePackets().Tag(kMaxInFlightTag).Get<int>();
|
||||||
}
|
}
|
||||||
RET_CHECK_GE(max_in_flight_, 1);
|
RET_CHECK_GE(max_in_flight_, 1);
|
||||||
num_in_flight_ = 0;
|
num_in_flight_ = 0;
|
||||||
|
|
|
@ -33,6 +33,9 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kFinishedTag[] = "FINISHED";
|
||||||
|
|
||||||
// A simple Semaphore for synchronizing test threads.
|
// A simple Semaphore for synchronizing test threads.
|
||||||
class AtomicSemaphore {
|
class AtomicSemaphore {
|
||||||
public:
|
public:
|
||||||
|
@ -112,7 +115,7 @@ TEST(RealTimeFlowLimiterCalculator, BasicTest) {
|
||||||
Timestamp timestamp =
|
Timestamp timestamp =
|
||||||
Timestamp((i + 1) * Timestamp::kTimestampUnitsPerSecond);
|
Timestamp((i + 1) * Timestamp::kTimestampUnitsPerSecond);
|
||||||
runner.MutableInputs()
|
runner.MutableInputs()
|
||||||
->Tag("FINISHED")
|
->Tag(kFinishedTag)
|
||||||
.packets.push_back(MakePacket<bool>(true).At(timestamp));
|
.packets.push_back(MakePacket<bool>(true).At(timestamp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ namespace mediapipe {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kPacketOffsetTag[] = "PACKET_OFFSET";
|
||||||
|
|
||||||
// Adds packets containing integers equal to their original timestamp.
|
// Adds packets containing integers equal to their original timestamp.
|
||||||
void AddPackets(CalculatorRunner* runner) {
|
void AddPackets(CalculatorRunner* runner) {
|
||||||
for (int i = 0; i < 10; ++i) {
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
@ -111,7 +113,7 @@ TEST(SequenceShiftCalculatorTest, SidePacketOffset) {
|
||||||
|
|
||||||
CalculatorRunner runner(node);
|
CalculatorRunner runner(node);
|
||||||
AddPackets(&runner);
|
AddPackets(&runner);
|
||||||
runner.MutableSidePackets()->Tag("PACKET_OFFSET") = Adopt(new int(-2));
|
runner.MutableSidePackets()->Tag(kPacketOffsetTag) = Adopt(new int(-2));
|
||||||
MP_ASSERT_OK(runner.Run());
|
MP_ASSERT_OK(runner.Run());
|
||||||
const std::vector<Packet>& input_packets =
|
const std::vector<Packet>& input_packets =
|
||||||
runner.MutableInputs()->Index(0).packets;
|
runner.MutableInputs()->Index(0).packets;
|
||||||
|
|
|
@ -80,4 +80,7 @@ typedef SplitVectorCalculator<mediapipe::ClassificationList, false>
|
||||||
SplitClassificationListVectorCalculator;
|
SplitClassificationListVectorCalculator;
|
||||||
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
|
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
|
||||||
|
|
||||||
|
typedef SplitVectorCalculator<uint64_t, false> SplitUint64tVectorCalculator;
|
||||||
|
REGISTER_CALCULATOR(SplitUint64tVectorCalculator);
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -661,3 +661,138 @@ cc_test(
|
||||||
"//mediapipe/framework/port:parse_text_proto",
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation",
|
||||||
|
hdrs = ["affine_transformation.h"],
|
||||||
|
deps = ["@com_google_absl//absl/status:statusor"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation_runner_gl",
|
||||||
|
srcs = ["affine_transformation_runner_gl.cc"],
|
||||||
|
hdrs = ["affine_transformation_runner_gl.h"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gl_simple_shaders",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||||
|
"//mediapipe/gpu:shader_util",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation_runner_opencv",
|
||||||
|
srcs = ["affine_transformation_runner_opencv.cc"],
|
||||||
|
hdrs = ["affine_transformation_runner_opencv.h"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
"//mediapipe/framework/port:opencv_core",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "warp_affine_calculator_proto",
|
||||||
|
srcs = ["warp_affine_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/gpu:gpu_origin_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "warp_affine_calculator",
|
||||||
|
srcs = ["warp_affine_calculator.cc"],
|
||||||
|
hdrs = ["warp_affine_calculator.h"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
":affine_transformation_runner_opencv",
|
||||||
|
":warp_affine_calculator_cc_proto",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
] + select({
|
||||||
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
":affine_transformation_runner_gl",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "warp_affine_calculator_test",
|
||||||
|
srcs = ["warp_affine_calculator_test.cc"],
|
||||||
|
data = [
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png",
|
||||||
|
],
|
||||||
|
tags = ["desktop_only_test"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
":warp_affine_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_converter",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_utils",
|
||||||
|
"//mediapipe/calculators/util:from_image_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/deps:file_path",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
"//mediapipe/framework/formats:rect_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:tensor",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:integral_types",
|
||||||
|
"//mediapipe/framework/port:opencv_core",
|
||||||
|
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
|
"@com_google_absl//absl/flags:flag",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
55
mediapipe/calculators/image/affine_transformation.h
Normal file
55
mediapipe/calculators/image/affine_transformation.h
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
class AffineTransformation {
|
||||||
|
public:
|
||||||
|
// Pixel extrapolation method.
|
||||||
|
// When converting image to tensor it may happen that tensor needs to read
|
||||||
|
// pixels outside image boundaries. Border mode helps to specify how such
|
||||||
|
// pixels will be calculated.
|
||||||
|
enum class BorderMode { kZero, kReplicate };
|
||||||
|
|
||||||
|
struct Size {
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InputT, typename OutputT>
|
||||||
|
class Runner {
|
||||||
|
public:
|
||||||
|
virtual ~Runner() = default;
|
||||||
|
|
||||||
|
// Transforms input into output using @matrix as following:
|
||||||
|
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||||
|
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||||
|
// where x and y ranges are defined by @output_size.
|
||||||
|
virtual absl::StatusOr<OutputT> Run(const InputT& input,
|
||||||
|
const std::array<float, 16>& matrix,
|
||||||
|
const Size& output_size,
|
||||||
|
BorderMode border_mode) = 0;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
|
@ -0,0 +1,354 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "Eigen/Geometry"
|
||||||
|
#include "Eigen/LU"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
#include "mediapipe/gpu/shader_util.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using mediapipe::GlCalculatorHelper;
|
||||||
|
using mediapipe::GlhCreateProgram;
|
||||||
|
using mediapipe::GlTexture;
|
||||||
|
using mediapipe::GpuBuffer;
|
||||||
|
using mediapipe::GpuOrigin;
|
||||||
|
|
||||||
|
bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) {
|
||||||
|
switch (gpu_origin) {
|
||||||
|
case GpuOrigin::DEFAULT:
|
||||||
|
case GpuOrigin::CONVENTIONAL:
|
||||||
|
#ifdef __APPLE__
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif // __APPLE__
|
||||||
|
case GpuOrigin::TOP_LEFT:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0
|
||||||
|
#else
|
||||||
|
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1
|
||||||
|
#endif // __APPLE__
|
||||||
|
|
||||||
|
bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
|
||||||
|
return gl_context.gl_major_version() > 3 ||
|
||||||
|
(gl_context.gl_major_version() == 3 &&
|
||||||
|
gl_context.gl_minor_version() >= 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int kAttribVertex = 0;
|
||||||
|
constexpr int kAttribTexturePosition = 1;
|
||||||
|
constexpr int kNumAttributes = 2;
|
||||||
|
|
||||||
|
class GlTextureWarpAffineRunner
|
||||||
|
: public AffineTransformation::Runner<GpuBuffer,
|
||||||
|
std::unique_ptr<GpuBuffer>> {
|
||||||
|
public:
|
||||||
|
GlTextureWarpAffineRunner(std::shared_ptr<GlCalculatorHelper> gl_helper,
|
||||||
|
GpuOrigin::Mode gpu_origin)
|
||||||
|
: gl_helper_(gl_helper), gpu_origin_(gpu_origin) {}
|
||||||
|
absl::Status Init() {
|
||||||
|
return gl_helper_->RunInGlContext([this]() -> absl::Status {
|
||||||
|
const GLint attr_location[kNumAttributes] = {
|
||||||
|
kAttribVertex,
|
||||||
|
kAttribTexturePosition,
|
||||||
|
};
|
||||||
|
const GLchar* attr_name[kNumAttributes] = {
|
||||||
|
"position",
|
||||||
|
"texture_coordinate",
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr GLchar kVertShader[] = R"(
|
||||||
|
in vec4 position;
|
||||||
|
in mediump vec4 texture_coordinate;
|
||||||
|
out mediump vec2 sample_coordinate;
|
||||||
|
uniform mat4 transform_matrix;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
gl_Position = position;
|
||||||
|
vec4 tc = transform_matrix * texture_coordinate;
|
||||||
|
sample_coordinate = tc.xy;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
constexpr GLchar kFragShader[] = R"(
|
||||||
|
DEFAULT_PRECISION(mediump, float)
|
||||||
|
in vec2 sample_coordinate;
|
||||||
|
uniform sampler2D input_texture;
|
||||||
|
|
||||||
|
#ifdef GL_ES
|
||||||
|
#define fragColor gl_FragColor
|
||||||
|
#else
|
||||||
|
out vec4 fragColor;
|
||||||
|
#endif // defined(GL_ES);
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec4 color = texture2D(input_texture, sample_coordinate);
|
||||||
|
#ifdef CUSTOM_ZERO_BORDER_MODE
|
||||||
|
float out_of_bounds =
|
||||||
|
float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 ||
|
||||||
|
sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0);
|
||||||
|
color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds);
|
||||||
|
#endif // defined(CUSTOM_ZERO_BORDER_MODE)
|
||||||
|
fragColor = color;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
// Create program and set parameters.
|
||||||
|
auto create_fn = [&](const std::string& vs,
|
||||||
|
const std::string& fs) -> absl::StatusOr<Program> {
|
||||||
|
GLuint program = 0;
|
||||||
|
GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0],
|
||||||
|
attr_location, &program);
|
||||||
|
|
||||||
|
RET_CHECK(program) << "Problem initializing warp affine program.";
|
||||||
|
glUseProgram(program);
|
||||||
|
glUniform1i(glGetUniformLocation(program, "input_texture"), 1);
|
||||||
|
GLint matrix_id = glGetUniformLocation(program, "transform_matrix");
|
||||||
|
return Program{.id = program, .matrix_id = matrix_id};
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::string vert_src =
|
||||||
|
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader);
|
||||||
|
|
||||||
|
const std::string frag_src = absl::StrCat(
|
||||||
|
mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader);
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
|
||||||
|
|
||||||
|
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
|
||||||
|
std::string custom_zero_border_mode_def = R"(
|
||||||
|
#define CUSTOM_ZERO_BORDER_MODE
|
||||||
|
)";
|
||||||
|
const std::string frag_custom_zero_src =
|
||||||
|
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
|
||||||
|
custom_zero_border_mode_def, kFragShader);
|
||||||
|
return create_fn(vert_src, frag_custom_zero_src);
|
||||||
|
};
|
||||||
|
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
|
||||||
|
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||||
|
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
|
||||||
|
glGenFramebuffers(1, &framebuffer_);
|
||||||
|
|
||||||
|
// vertex storage
|
||||||
|
glGenBuffers(2, vbo_);
|
||||||
|
glGenVertexArrays(1, &vao_);
|
||||||
|
|
||||||
|
// vbo 0
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
|
||||||
|
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
|
||||||
|
|
||||||
|
// vbo 1
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
|
||||||
|
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
|
||||||
|
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<GpuBuffer>> Run(
|
||||||
|
const GpuBuffer& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
std::unique_ptr<GpuBuffer> gpu_buffer;
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode,
|
||||||
|
&gpu_buffer]() -> absl::Status {
|
||||||
|
auto input_texture = gl_helper_->CreateSourceTexture(input);
|
||||||
|
auto output_texture = gl_helper_->CreateDestinationTexture(
|
||||||
|
size.width, size.height, input.format());
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
RunInternal(input_texture, matrix, border_mode, &output_texture));
|
||||||
|
gpu_buffer = output_texture.GetFrame<GpuBuffer>();
|
||||||
|
return absl::OkStatus();
|
||||||
|
}));
|
||||||
|
|
||||||
|
return gpu_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status RunInternal(const GlTexture& texture,
|
||||||
|
const std::array<float, 16>& matrix,
|
||||||
|
AffineTransformation::BorderMode border_mode,
|
||||||
|
GlTexture* output) {
|
||||||
|
glDisable(GL_DEPTH_TEST);
|
||||||
|
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
|
||||||
|
glViewport(0, 0, output->width(), output->height());
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, output->name());
|
||||||
|
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||||
|
output->name(), 0);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(texture.target(), texture.name());
|
||||||
|
|
||||||
|
// a) Filtering.
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
|
||||||
|
// b) Clamping.
|
||||||
|
std::optional<Program> program = program_;
|
||||||
|
switch (border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kReplicate: {
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AffineTransformation::BorderMode::kZero: {
|
||||||
|
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
if (program_custom_zero_) {
|
||||||
|
program = program_custom_zero_;
|
||||||
|
} else {
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
|
||||||
|
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR,
|
||||||
|
std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f}.data());
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
RET_CHECK(program_custom_zero_)
|
||||||
|
<< "Program must have been initialized.";
|
||||||
|
program = program_custom_zero_;
|
||||||
|
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glUseProgram(program->id);
|
||||||
|
|
||||||
|
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> eigen_mat(matrix.data());
|
||||||
|
if (IsMatrixVerticalFlipNeeded(gpu_origin_)) {
|
||||||
|
// @matrix describes affine transformation in terms of TOP LEFT origin, so
|
||||||
|
// in some cases/on some platforms an extra flipping should be done before
|
||||||
|
// and after.
|
||||||
|
const Eigen::Matrix<float, 4, 4, Eigen::RowMajor> flip_y(
|
||||||
|
{{1.0f, 0.0f, 0.0f, 0.0f},
|
||||||
|
{0.0f, -1.0f, 0.0f, 1.0f},
|
||||||
|
{0.0f, 0.0f, 1.0f, 0.0f},
|
||||||
|
{0.0f, 0.0f, 0.0f, 1.0f}});
|
||||||
|
eigen_mat = flip_y * eigen_mat * flip_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If GL context is ES2, then GL_FALSE must be used for 'transpose'
|
||||||
|
// GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported.
|
||||||
|
// Hence, transposing the matrix and always passing transposed.
|
||||||
|
eigen_mat.transposeInPlace();
|
||||||
|
glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data());
|
||||||
|
|
||||||
|
// vao
|
||||||
|
glBindVertexArray(vao_);
|
||||||
|
|
||||||
|
// vbo 0
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||||
|
glEnableVertexAttribArray(kAttribVertex);
|
||||||
|
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// vbo 1
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||||
|
glEnableVertexAttribArray(kAttribTexturePosition);
|
||||||
|
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// draw
|
||||||
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
|
||||||
|
// Resetting to MediaPipe texture param defaults.
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
|
||||||
|
glDisableVertexAttribArray(kAttribVertex);
|
||||||
|
glDisableVertexAttribArray(kAttribTexturePosition);
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
|
glBindVertexArray(0);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
~GlTextureWarpAffineRunner() override {
|
||||||
|
gl_helper_->RunInGlContext([this]() {
|
||||||
|
// Release OpenGL resources.
|
||||||
|
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
|
||||||
|
if (program_.id != 0) glDeleteProgram(program_.id);
|
||||||
|
if (program_custom_zero_ && program_custom_zero_->id != 0) {
|
||||||
|
glDeleteProgram(program_custom_zero_->id);
|
||||||
|
}
|
||||||
|
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
|
||||||
|
glDeleteBuffers(2, vbo_);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Program {
|
||||||
|
GLuint id;
|
||||||
|
GLint matrix_id;
|
||||||
|
};
|
||||||
|
std::shared_ptr<GlCalculatorHelper> gl_helper_;
|
||||||
|
GpuOrigin::Mode gpu_origin_;
|
||||||
|
GLuint vao_ = 0;
|
||||||
|
GLuint vbo_[2] = {0, 0};
|
||||||
|
Program program_;
|
||||||
|
std::optional<Program> program_custom_zero_;
|
||||||
|
GLuint framebuffer_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<
|
||||||
|
AffineTransformation::Runner<GpuBuffer, std::unique_ptr<GpuBuffer>>>>
|
||||||
|
CreateAffineTransformationGlRunner(
|
||||||
|
std::shared_ptr<GlCalculatorHelper> gl_helper, GpuOrigin::Mode gpu_origin) {
|
||||||
|
auto runner =
|
||||||
|
absl::make_unique<GlTextureWarpAffineRunner>(gl_helper, gpu_origin);
|
||||||
|
MP_RETURN_IF_ERROR(runner->Init());
|
||||||
|
return runner;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,36 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<AffineTransformation::Runner<
|
||||||
|
mediapipe::GpuBuffer, std::unique_ptr<mediapipe::GpuBuffer>>>>
|
||||||
|
CreateAffineTransformationGlRunner(
|
||||||
|
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper,
|
||||||
|
mediapipe::GpuOrigin::Mode gpu_origin);
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
|
@ -0,0 +1,160 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cv::BorderTypes GetBorderModeForOpenCv(
|
||||||
|
AffineTransformation::BorderMode border_mode) {
|
||||||
|
switch (border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kZero:
|
||||||
|
return cv::BORDER_CONSTANT;
|
||||||
|
case AffineTransformation::BorderMode::kReplicate:
|
||||||
|
return cv::BORDER_REPLICATE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class OpenCvRunner
|
||||||
|
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
|
||||||
|
public:
|
||||||
|
absl::StatusOr<ImageFrame> Run(
|
||||||
|
const ImageFrame& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
|
||||||
|
// accepts and produces relative coordinates) should be adjusted to first
|
||||||
|
// normalize coordinates and then scale them.
|
||||||
|
// clang-format off
|
||||||
|
cv::Matx44f normalize_dst_coordinate({
|
||||||
|
1.0f / size.width, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f / size.height, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
cv::Matx44f scale_src_coordinate({
|
||||||
|
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
// clang-format on
|
||||||
|
cv::Matx44f adjust_dst_coordinate;
|
||||||
|
cv::Matx44f adjust_src_coordinate;
|
||||||
|
// TODO: update to always use accurate implementation.
|
||||||
|
constexpr bool kOpenCvCompatibility = true;
|
||||||
|
if (kOpenCvCompatibility) {
|
||||||
|
adjust_dst_coordinate = normalize_dst_coordinate;
|
||||||
|
adjust_src_coordinate = scale_src_coordinate;
|
||||||
|
} else {
|
||||||
|
// To do an accurate affine image transformation and make "on-cpu" and
|
||||||
|
// "on-gpu" calculations aligned - extra offset is required to select
|
||||||
|
// correct pixels.
|
||||||
|
//
|
||||||
|
// Each destination pixel corresponds to some pixels region from source
|
||||||
|
// image.(In case of downscaling there can be more than one pixel.) The
|
||||||
|
// offset for x and y is calculated in the way, so pixel in the middle of
|
||||||
|
// the region is selected.
|
||||||
|
//
|
||||||
|
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
|
||||||
|
// without a rotation:
|
||||||
|
// 1. Each destination pixel corresponds to 10x5 region
|
||||||
|
// X range: [0, .. , 9]
|
||||||
|
// Y range: [0, .. , 4]
|
||||||
|
// 2. Considering we have __discrete__ pixels, the center of the region is
|
||||||
|
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
|
||||||
|
// (4.5, 2).
|
||||||
|
// 3. When using the above as an offset for every pixel select while
|
||||||
|
// downscaling, resulting pixels are:
|
||||||
|
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
|
||||||
|
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
|
||||||
|
// ..
|
||||||
|
// (4.5, 47), (14.5, 47), .., (94.5, 47)
|
||||||
|
// instead of:
|
||||||
|
// (0, 0), (10, 0), .. , (90, 0)
|
||||||
|
// (0, 5), (10, 7), .. , (90, 5)
|
||||||
|
// ..
|
||||||
|
// (0, 45), (10, 45), .., (90, 45)
|
||||||
|
// The latter looks shifted.
|
||||||
|
//
|
||||||
|
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
|
||||||
|
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
|
||||||
|
// transformation matrix should shift coordinates by (0.5, 0.5) as the
|
||||||
|
// very first step.
|
||||||
|
//
|
||||||
|
// Due to the above shift, transformed coordinates would be valid for
|
||||||
|
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
|
||||||
|
// T0 make it valid for __discrete__ pixels, transformation matrix should
|
||||||
|
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
|
||||||
|
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
|
||||||
|
// clang-format off
|
||||||
|
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
|
||||||
|
0.0f, 1.0f, 0.0f, 0.5f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
|
||||||
|
0.0f, 1.0f, 0.0f, -0.5f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
// clang-format on
|
||||||
|
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
|
||||||
|
adjust_src_coordinate = shift_src * scale_src_coordinate;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Matx44f transform(matrix.data());
|
||||||
|
cv::Matx44f transform_absolute =
|
||||||
|
adjust_src_coordinate * transform * adjust_dst_coordinate;
|
||||||
|
|
||||||
|
cv::Mat in_mat = formats::MatView(&input);
|
||||||
|
|
||||||
|
cv::Mat cv_affine_transform(2, 3, CV_32F);
|
||||||
|
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
|
||||||
|
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
|
||||||
|
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
|
||||||
|
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
|
||||||
|
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
|
||||||
|
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
|
||||||
|
|
||||||
|
ImageFrame out_image(input.Format(), size.width, size.height);
|
||||||
|
cv::Mat out_mat = formats::MatView(&out_image);
|
||||||
|
|
||||||
|
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
|
||||||
|
cv::Size(out_mat.cols, out_mat.rows),
|
||||||
|
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
|
||||||
|
GetBorderModeForOpenCv(border_mode));
|
||||||
|
|
||||||
|
return out_image;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::StatusOr<
|
||||||
|
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||||
|
CreateAffineTransformationOpenCvRunner() {
|
||||||
|
return absl::make_unique<OpenCvRunner>();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
absl::StatusOr<
|
||||||
|
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||||
|
CreateAffineTransformationOpenCvRunner();
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
|
@ -240,7 +240,7 @@ absl::Status BilateralFilterCalculator::RenderCpu(CalculatorContext* cc) {
|
||||||
auto input_mat = mediapipe::formats::MatView(&input_frame);
|
auto input_mat = mediapipe::formats::MatView(&input_frame);
|
||||||
|
|
||||||
// Only 1 or 3 channel images supported by OpenCV.
|
// Only 1 or 3 channel images supported by OpenCV.
|
||||||
if ((input_mat.channels() == 1 || input_mat.channels() == 3)) {
|
if (!(input_mat.channels() == 1 || input_mat.channels() == 3)) {
|
||||||
return absl::InternalError(
|
return absl::InternalError(
|
||||||
"CPU filtering supports only 1 or 3 channel input images.");
|
"CPU filtering supports only 1 or 3 channel input images.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ using GpuBuffer = mediapipe::GpuBuffer;
|
||||||
// stored on the target storage (CPU vs GPU) specified in the calculator option.
|
// stored on the target storage (CPU vs GPU) specified in the calculator option.
|
||||||
//
|
//
|
||||||
// The clone shares ownership of the input pixel data on the existing storage.
|
// The clone shares ownership of the input pixel data on the existing storage.
|
||||||
// If the target storage is diffrent from the existing one, then the data is
|
// If the target storage is different from the existing one, then the data is
|
||||||
// further copied there.
|
// further copied there.
|
||||||
//
|
//
|
||||||
// Example usage:
|
// Example usage:
|
||||||
|
|
|
@ -480,8 +480,7 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc,
|
||||||
if (cc->Inputs().HasTag(kRectTag)) {
|
if (cc->Inputs().HasTag(kRectTag)) {
|
||||||
const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>();
|
const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>();
|
||||||
// Only use the rect if it is valid.
|
// Only use the rect if it is valid.
|
||||||
if (rect.width() > 0 && rect.height() > 0 && rect.x_center() >= 0 &&
|
if (rect.width() > 0 && rect.height() > 0) {
|
||||||
rect.y_center() >= 0) {
|
|
||||||
x_center = rect.x_center();
|
x_center = rect.x_center();
|
||||||
y_center = rect.y_center();
|
y_center = rect.y_center();
|
||||||
crop_width = rect.width();
|
crop_width = rect.width();
|
||||||
|
|
|
@ -102,6 +102,10 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
|
||||||
// IMAGE: ImageFrame representing the input image.
|
// IMAGE: ImageFrame representing the input image.
|
||||||
// IMAGE_GPU: GpuBuffer representing the input image.
|
// IMAGE_GPU: GpuBuffer representing the input image.
|
||||||
//
|
//
|
||||||
|
// OUTPUT_DIMENSIONS (optional): The output width and height in pixels as
|
||||||
|
// pair<int, int>. If set, it will override corresponding field in calculator
|
||||||
|
// options and input side packet.
|
||||||
|
//
|
||||||
// ROTATION_DEGREES (optional): The counterclockwise rotation angle in
|
// ROTATION_DEGREES (optional): The counterclockwise rotation angle in
|
||||||
// degrees. This allows different rotation angles for different frames. It has
|
// degrees. This allows different rotation angles for different frames. It has
|
||||||
// to be a multiple of 90 degrees. If provided, it overrides the
|
// to be a multiple of 90 degrees. If provided, it overrides the
|
||||||
|
@ -221,6 +225,10 @@ absl::Status ImageTransformationCalculator::GetContract(
|
||||||
}
|
}
|
||||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
||||||
|
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Set<std::pair<int, int>>();
|
||||||
|
}
|
||||||
|
|
||||||
if (cc->Inputs().HasTag("ROTATION_DEGREES")) {
|
if (cc->Inputs().HasTag("ROTATION_DEGREES")) {
|
||||||
cc->Inputs().Tag("ROTATION_DEGREES").Set<int>();
|
cc->Inputs().Tag("ROTATION_DEGREES").Set<int>();
|
||||||
}
|
}
|
||||||
|
@ -329,6 +337,16 @@ absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
|
||||||
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
|
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
|
||||||
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
|
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
|
||||||
}
|
}
|
||||||
|
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
||||||
|
if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
} else {
|
||||||
|
const auto& image_size =
|
||||||
|
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<std::pair<int, int>>();
|
||||||
|
output_width_ = image_size.first;
|
||||||
|
output_height_ = image_size.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (use_gpu_) {
|
if (use_gpu_) {
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
@ -491,6 +509,14 @@ absl::Status ImageTransformationCalculator::RenderGpu(CalculatorContext* cc) {
|
||||||
ComputeOutputDimensions(input_width, input_height, &output_width,
|
ComputeOutputDimensions(input_width, input_height, &output_width,
|
||||||
&output_height);
|
&output_height);
|
||||||
|
|
||||||
|
if (scale_mode_ == mediapipe::ScaleMode_Mode_FILL_AND_CROP) {
|
||||||
|
const float scale =
|
||||||
|
std::min(static_cast<float>(output_width_) / input_width,
|
||||||
|
static_cast<float>(output_height_) / input_height);
|
||||||
|
output_width = std::round(input_width * scale);
|
||||||
|
output_height = std::round(input_height * scale);
|
||||||
|
}
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("LETTERBOX_PADDING")) {
|
if (cc->Outputs().HasTag("LETTERBOX_PADDING")) {
|
||||||
auto padding = absl::make_unique<std::array<float, 4>>();
|
auto padding = absl::make_unique<std::array<float, 4>>();
|
||||||
ComputeOutputLetterboxPadding(input_width, input_height, output_width,
|
ComputeOutputLetterboxPadding(input_width, input_height, output_width,
|
||||||
|
|
|
@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) {
|
||||||
scale_image::FindOutputDimensions(crop_width_, crop_height_, //
|
scale_image::FindOutputDimensions(crop_width_, crop_height_, //
|
||||||
options_.target_width(), //
|
options_.target_width(), //
|
||||||
options_.target_height(), //
|
options_.target_height(), //
|
||||||
|
options_.target_max_area(), //
|
||||||
options_.preserve_aspect_ratio(), //
|
options_.preserve_aspect_ratio(), //
|
||||||
options_.scale_to_multiple_of(), //
|
options_.scale_to_multiple_of(), //
|
||||||
&output_width_, &output_height_));
|
&output_width_, &output_height_));
|
||||||
|
|
|
@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions {
|
||||||
optional int32 target_width = 1;
|
optional int32 target_width = 1;
|
||||||
optional int32 target_height = 2;
|
optional int32 target_height = 2;
|
||||||
|
|
||||||
|
// If set, then automatically calculates a target_width and target_height that
|
||||||
|
// has an area below the target max area. Aspect ratio preservation cannot be
|
||||||
|
// disabled.
|
||||||
|
optional int32 target_max_area = 15;
|
||||||
|
|
||||||
// If true, the image is scaled up or down proportionally so that it
|
// If true, the image is scaled up or down proportionally so that it
|
||||||
// fits inside the box represented by target_width and target_height.
|
// fits inside the box represented by target_width and target_height.
|
||||||
// Otherwise it is scaled to fit target_width and target_height
|
// Otherwise it is scaled to fit target_width and target_height
|
||||||
|
|
|
@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, //
|
||||||
int input_height, //
|
int input_height, //
|
||||||
int target_width, //
|
int target_width, //
|
||||||
int target_height, //
|
int target_height, //
|
||||||
|
int target_max_area, //
|
||||||
bool preserve_aspect_ratio, //
|
bool preserve_aspect_ratio, //
|
||||||
int scale_to_multiple_of, //
|
int scale_to_multiple_of, //
|
||||||
int* output_width, int* output_height) {
|
int* output_width, int* output_height) {
|
||||||
CHECK(output_width);
|
CHECK(output_width);
|
||||||
CHECK(output_height);
|
CHECK(output_height);
|
||||||
|
|
||||||
|
if (target_max_area > 0 && input_width * input_height > target_max_area) {
|
||||||
|
preserve_aspect_ratio = true;
|
||||||
|
target_height = static_cast<int>(sqrt(static_cast<double>(target_max_area) /
|
||||||
|
(static_cast<double>(input_width) /
|
||||||
|
static_cast<double>(input_height))));
|
||||||
|
target_width = -1; // Resize width to preserve aspect ratio.
|
||||||
|
}
|
||||||
|
|
||||||
if (preserve_aspect_ratio) {
|
if (preserve_aspect_ratio) {
|
||||||
RET_CHECK(scale_to_multiple_of == 2)
|
RET_CHECK(scale_to_multiple_of == 2)
|
||||||
<< "FindOutputDimensions always outputs width and height that are "
|
<< "FindOutputDimensions always outputs width and height that are "
|
||||||
|
@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, //
|
||||||
<< "Unable to set output dimensions based on target dimensions.";
|
<< "Unable to set output dimensions based on target dimensions.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status FindOutputDimensions(int input_width, //
|
||||||
|
int input_height, //
|
||||||
|
int target_width, //
|
||||||
|
int target_height, //
|
||||||
|
bool preserve_aspect_ratio, //
|
||||||
|
int scale_to_multiple_of, //
|
||||||
|
int* output_width, int* output_height) {
|
||||||
|
return FindOutputDimensions(
|
||||||
|
input_width, input_height, target_width, target_height, -1,
|
||||||
|
preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace scale_image
|
} // namespace scale_image
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, //
|
||||||
int* crop_width, int* crop_height, //
|
int* crop_width, int* crop_height, //
|
||||||
int* col_start, int* row_start);
|
int* col_start, int* row_start);
|
||||||
|
|
||||||
// Given an input width and height, a target width and height, whether to
|
// Given an input width and height, a target width and height or max area,
|
||||||
// preserve the aspect ratio, and whether to round-down to the multiple of a
|
// whether to preserve the aspect ratio, and whether to round-down to the
|
||||||
// given number nearest to the targets, determine the output width and height.
|
// multiple of a given number nearest to the targets, determine the output width
|
||||||
// If target_width or target_height is non-positive, then they will be set to
|
// and height. If target_width or target_height is non-positive, then they will
|
||||||
// the input_width and input_height respectively. If scale_to_multiple_of is
|
// be set to the input_width and input_height respectively. If target_area is
|
||||||
// less than 1, it will be treated like 1. The output_width and
|
// non-positive, then it will be ignored. If scale_to_multiple_of is less than
|
||||||
// output_height will be reduced as necessary to preserve_aspect_ratio if the
|
// 1, it will be treated like 1. The output_width and output_height will be
|
||||||
// option is specified. If preserving the aspect ratio is desired, you must set
|
// reduced as necessary to preserve_aspect_ratio if the option is specified. If
|
||||||
// scale_to_multiple_of to 2.
|
// preserving the aspect ratio is desired, you must set scale_to_multiple_of
|
||||||
|
// to 2.
|
||||||
|
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||||
|
int target_width,
|
||||||
|
int target_height, //
|
||||||
|
int target_max_area, //
|
||||||
|
bool preserve_aspect_ratio, //
|
||||||
|
int scale_to_multiple_of, //
|
||||||
|
int* output_width, int* output_height);
|
||||||
|
|
||||||
|
// Backwards compatible helper.
|
||||||
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||||
int target_width,
|
int target_width,
|
||||||
int target_height, //
|
int target_height, //
|
||||||
|
|
|
@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Not scale.
|
// Not scale.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(100, output_height);
|
EXPECT_EQ(100, output_height);
|
||||||
// Not scale with odd input size.
|
// Not scale with odd input size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(201, output_width);
|
EXPECT_EQ(201, output_width);
|
||||||
EXPECT_EQ(101, output_height);
|
EXPECT_EQ(101, output_height);
|
||||||
// Scale down by 1/2.
|
// Scale down by 1/2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale up, doubling dimensions.
|
// Scale up, doubling dimensions.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(400, output_width);
|
EXPECT_EQ(400, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
|
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
|
||||||
// visible by 2.
|
// visible by 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(150, output_width);
|
EXPECT_EQ(150, output_width);
|
||||||
EXPECT_EQ(74, output_height);
|
EXPECT_EQ(74, output_height);
|
||||||
// Fits a 2:1 image into a 400 x 50 box.
|
// Fits a 2:1 image into a 400 x 50 box.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to multiple number with odd targe size.
|
// Scale to multiple number with odd targe size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to multiple number with odd targe size.
|
// Scale to multiple number with odd targe size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to odd size.
|
// Scale to odd size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(151, output_width);
|
EXPECT_EQ(151, output_width);
|
||||||
EXPECT_EQ(101, output_height);
|
EXPECT_EQ(101, output_height);
|
||||||
}
|
}
|
||||||
|
@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Scale width only.
|
// Scale width only.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(100, output_height);
|
EXPECT_EQ(100, output_height);
|
||||||
// Scale height only.
|
// Scale height only.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Scale both dimensions.
|
// Scale both dimensions.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(150, output_width);
|
EXPECT_EQ(150, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
}
|
}
|
||||||
|
@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Set no targets, downscale to a multiple of 8.
|
// Set no targets, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(96, output_width);
|
EXPECT_EQ(96, output_width);
|
||||||
EXPECT_EQ(96, output_height);
|
EXPECT_EQ(96, output_height);
|
||||||
// Set width target, downscale to a multiple of 8.
|
// Set width target, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(96, output_width);
|
EXPECT_EQ(96, output_width);
|
||||||
EXPECT_EQ(96, output_height);
|
EXPECT_EQ(96, output_height);
|
||||||
// Set height target, downscale to a multiple of 8.
|
// Set height target, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Set both targets, downscale to a multiple of 8.
|
// Set both targets, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(144, output_width);
|
EXPECT_EQ(144, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Doesn't throw error if keep aspect is true and downscale multiple is 2.
|
// Doesn't throw error if keep aspect is true and downscale multiple is 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(400, output_width);
|
EXPECT_EQ(400, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Throws error if keep aspect is true, but downscale multiple is not 2.
|
// Throws error if keep aspect is true, but downscale multiple is not 2.
|
||||||
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width,
|
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4,
|
||||||
&output_height),
|
&output_width, &output_height),
|
||||||
testing::Not(testing::status::IsOk()));
|
testing::Not(testing::status::IsOk()));
|
||||||
// Downscaling to multiple ignored if multiple is less than 2.
|
// Downscaling to multiple ignored if multiple is less than 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(401, output_width);
|
EXPECT_EQ(401, output_width);
|
||||||
EXPECT_EQ(201, output_height);
|
EXPECT_EQ(201, output_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests scaling without keeping the aspect ratio fixed.
|
||||||
|
TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) {
|
||||||
|
int output_width;
|
||||||
|
int output_height;
|
||||||
|
// Smaller area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_NEAR(
|
||||||
|
200 / 100,
|
||||||
|
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||||
|
0.1f);
|
||||||
|
EXPECT_LE(output_width * output_height, 9000);
|
||||||
|
// Close to original area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_NEAR(
|
||||||
|
200.0 / 100.0,
|
||||||
|
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||||
|
0.1f);
|
||||||
|
EXPECT_LE(output_width * output_height, 19999);
|
||||||
|
// Don't scale with larger area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
// Don't scale with equal area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
// Don't scale at all.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace scale_image
|
} // namespace scale_image
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -53,7 +53,7 @@ enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||||
// The alpha channel can be set to a single value, or come from an image mask.
|
// The alpha channel can be set to a single value, or come from an image mask.
|
||||||
// If the input image has an alpha channel, it will be updated.
|
// If the input image has an alpha channel, it will be updated.
|
||||||
// If the input image doesn't have an alpha channel, one will be added.
|
// If the input image doesn't have an alpha channel, one will be added.
|
||||||
// Adding alpha channel to a Grayscale (single channel) input is not suported.
|
// Adding alpha channel to a Grayscale (single channel) input is not supported.
|
||||||
//
|
//
|
||||||
// Inputs:
|
// Inputs:
|
||||||
// One of the following two IMAGE tags:
|
// One of the following two IMAGE tags:
|
||||||
|
|
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/warp_affine_calculator.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||||
|
#include "mediapipe/calculators/image/warp_affine_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
AffineTransformation::BorderMode GetBorderMode(
|
||||||
|
mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) {
|
||||||
|
switch (border_mode) {
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO:
|
||||||
|
return AffineTransformation::BorderMode::kZero;
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED:
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE:
|
||||||
|
return AffineTransformation::BorderMode::kReplicate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
class WarpAffineRunnerHolder {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<ImageFrame> {
|
||||||
|
public:
|
||||||
|
using RunnerType = AffineTransformation::Runner<ImageFrame, ImageFrame>;
|
||||||
|
absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); }
|
||||||
|
absl::StatusOr<RunnerType*> GetRunner() {
|
||||||
|
if (!runner_) {
|
||||||
|
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner());
|
||||||
|
}
|
||||||
|
return runner_.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<RunnerType> runner_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
|
||||||
|
public:
|
||||||
|
using RunnerType =
|
||||||
|
AffineTransformation::Runner<mediapipe::GpuBuffer,
|
||||||
|
std::unique_ptr<mediapipe::GpuBuffer>>;
|
||||||
|
absl::Status Open(CalculatorContext* cc) {
|
||||||
|
gpu_origin_ =
|
||||||
|
cc->Options<mediapipe::WarpAffineCalculatorOptions>().gpu_origin();
|
||||||
|
gl_helper_ = std::make_shared<mediapipe::GlCalculatorHelper>();
|
||||||
|
return gl_helper_->Open(cc);
|
||||||
|
}
|
||||||
|
absl::StatusOr<RunnerType*> GetRunner() {
|
||||||
|
if (!runner_) {
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_));
|
||||||
|
}
|
||||||
|
return runner_.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
mediapipe::GpuOrigin::Mode gpu_origin_;
|
||||||
|
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper_;
|
||||||
|
std::unique_ptr<RunnerType> runner_;
|
||||||
|
};
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); }
|
||||||
|
absl::StatusOr<
|
||||||
|
AffineTransformation::Runner<mediapipe::Image, mediapipe::Image>*>
|
||||||
|
GetRunner() {
|
||||||
|
return &runner_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
class Runner : public AffineTransformation::Runner<mediapipe::Image,
|
||||||
|
mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) {
|
||||||
|
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
absl::StatusOr<mediapipe::Image> Run(
|
||||||
|
const mediapipe::Image& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
if (input.UsesGpu()) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
|
||||||
|
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
|
||||||
|
size, border_mode));
|
||||||
|
return mediapipe::Image(*result);
|
||||||
|
#else
|
||||||
|
return absl::UnavailableError("GPU support is disabled");
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
}
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
|
||||||
|
const auto& frame_ptr = input.GetImageFrameSharedPtr();
|
||||||
|
// Wrap image into image frame.
|
||||||
|
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
|
||||||
|
frame_ptr->Height(), frame_ptr->WidthStep(),
|
||||||
|
const_cast<uint8_t*>(frame_ptr->PixelData()),
|
||||||
|
[](uint8* data) {});
|
||||||
|
ASSIGN_OR_RETURN(auto result,
|
||||||
|
runner->Run(image_frame, matrix, size, border_mode));
|
||||||
|
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
WarpAffineRunnerHolder<ImageFrame> cpu_holder_;
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
};
|
||||||
|
|
||||||
|
Runner runner_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InterfaceT>
|
||||||
|
class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
|
||||||
|
public:
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||||
|
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
|
||||||
|
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
|
||||||
|
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
if (InterfaceT::kInImage(cc).IsEmpty() ||
|
||||||
|
InterfaceT::kMatrix(cc).IsEmpty() ||
|
||||||
|
InterfaceT::kOutputSize(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
|
||||||
|
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
|
||||||
|
AffineTransformation::Size output_size;
|
||||||
|
output_size.width = out_width;
|
||||||
|
output_size.height = out_height;
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
auto result,
|
||||||
|
runner->Run(
|
||||||
|
*InterfaceT::kInImage(cc), transform, output_size,
|
||||||
|
GetBorderMode(cc->Options<mediapipe::WarpAffineCalculatorOptions>()
|
||||||
|
.border_mode())));
|
||||||
|
InterfaceT::kOutImage(cc).Send(std::move(result));
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
|
||||||
|
holder_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||||
|
WarpAffineCalculatorImpl<WarpAffineCalculatorCpu>);
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||||
|
WarpAffineCalculatorImpl<WarpAffineCalculatorGpu>);
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl<WarpAffineCalculator>);
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||||
|
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
// Runs affine transformation.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||||
|
//
|
||||||
|
// MATRIX - std::array<float, 16>
|
||||||
|
// Used as following:
|
||||||
|
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||||
|
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||||
|
// where x and y ranges are defined by @OUTPUT_SIZE.
|
||||||
|
//
|
||||||
|
// OUTPUT_SIZE - std::pair<int, int>
|
||||||
|
// Size of the output image.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// - Output image type and format are the same as the input one.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "WarpAffineCalculator(Cpu|Gpu)"
|
||||||
|
// input_stream: "IMAGE:image"
|
||||||
|
// input_stream: "MATRIX:matrix"
|
||||||
|
// input_stream: "OUTPUT_SIZE:size"
|
||||||
|
// output_stream: "IMAGE:transformed_image"
|
||||||
|
// options: {
|
||||||
|
// [mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
// border_mode: BORDER_ZERO
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
template <typename ImageT>
|
||||||
|
class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf {
|
||||||
|
public:
|
||||||
|
static constexpr mediapipe::api2::Input<ImageT> kInImage{"IMAGE"};
|
||||||
|
static constexpr mediapipe::api2::Input<std::array<float, 16>> kMatrix{
|
||||||
|
"MATRIX"};
|
||||||
|
static constexpr mediapipe::api2::Input<std::pair<int, int>> kOutputSize{
|
||||||
|
"OUTPUT_SIZE"};
|
||||||
|
static constexpr mediapipe::api2::Output<ImageT> kOutImage{"IMAGE"};
|
||||||
|
};
|
||||||
|
|
||||||
|
class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf<ImageFrame> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix,
|
||||||
|
kOutputSize, kOutImage);
|
||||||
|
};
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
class WarpAffineCalculatorGpu
|
||||||
|
: public WarpAffineCalculatorIntf<mediapipe::GpuBuffer> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix,
|
||||||
|
kOutputSize, kOutImage);
|
||||||
|
};
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
class WarpAffineCalculator : public WarpAffineCalculatorIntf<mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize,
|
||||||
|
kOutImage);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/gpu/gpu_origin.proto";
|
||||||
|
|
||||||
|
message WarpAffineCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional WarpAffineCalculatorOptions ext = 373693895;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pixel extrapolation methods. See @border_mode.
|
||||||
|
enum BorderMode {
|
||||||
|
BORDER_UNSPECIFIED = 0;
|
||||||
|
BORDER_ZERO = 1;
|
||||||
|
BORDER_REPLICATE = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pixel extrapolation method.
|
||||||
|
// When converting image to tensor it may happen that tensor needs to read
|
||||||
|
// pixels outside image boundaries. Border mode helps to specify how such
|
||||||
|
// pixels will be calculated.
|
||||||
|
//
|
||||||
|
// BORDER_REPLICATE is used by default.
|
||||||
|
optional BorderMode border_mode = 1;
|
||||||
|
|
||||||
|
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||||
|
// to be flipped vertically as tensors are expected to start at top.
|
||||||
|
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||||
|
optional GpuOrigin.Mode gpu_origin = 2;
|
||||||
|
}
|
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
|
@ -0,0 +1,615 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/flags/flag.h"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/substitute.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||||
|
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
|
#include "mediapipe/framework/deps/file_path.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||||
|
#include "mediapipe/framework/formats/rect.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/integral_types.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cv::Mat GetRgb(absl::string_view path) {
|
||||||
|
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||||
|
cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3);
|
||||||
|
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||||
|
cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3);
|
||||||
|
return rgb;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat GetRgba(absl::string_view path) {
|
||||||
|
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||||
|
cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0));
|
||||||
|
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||||
|
cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3);
|
||||||
|
return bgr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test template.
|
||||||
|
// No processing/assertions should be done after the function is invoked.
|
||||||
|
void RunTest(const std::string& graph_text, const std::string& tag,
|
||||||
|
const cv::Mat& input, cv::Mat expected_result,
|
||||||
|
float similarity_threshold, std::array<float, 16> matrix,
|
||||||
|
int out_width, int out_height,
|
||||||
|
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||||
|
std::string border_mode_str;
|
||||||
|
if (border_mode) {
|
||||||
|
switch (*border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kReplicate:
|
||||||
|
border_mode_str = "border_mode: BORDER_REPLICATE";
|
||||||
|
break;
|
||||||
|
case AffineTransformation::BorderMode::kZero:
|
||||||
|
border_mode_str = "border_mode: BORDER_ZERO";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||||
|
absl::Substitute(graph_text, /*$0=*/border_mode_str));
|
||||||
|
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("output_image", &graph_config, &output_packets);
|
||||||
|
|
||||||
|
// Run the graph.
|
||||||
|
CalculatorGraph graph;
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
|
||||||
|
ImageFrame input_image(
|
||||||
|
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
|
||||||
|
input.cols, input.rows, input.step, input.data, [](uint8*) {});
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"input_image",
|
||||||
|
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"matrix",
|
||||||
|
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"output_size", MakePacket<std::pair<int, int>>(
|
||||||
|
std::pair<int, int>(out_width, out_height))
|
||||||
|
.At(Timestamp(0))));
|
||||||
|
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||||
|
|
||||||
|
// Get and process results.
|
||||||
|
const ImageFrame& out_frame = output_packets[0].Get<ImageFrame>();
|
||||||
|
cv::Mat result = formats::MatView(&out_frame);
|
||||||
|
double similarity =
|
||||||
|
1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2);
|
||||||
|
EXPECT_GE(similarity, similarity_threshold);
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("output_size"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class InputType { kImageFrame, kImage };
|
||||||
|
|
||||||
|
// Similarity is checked against OpenCV results always, and due to differences
|
||||||
|
// on how OpenCV and GL treats pixels there are two thresholds.
|
||||||
|
// TODO: update to have just one threshold when OpenCV
|
||||||
|
// implementation is updated.
|
||||||
|
struct SimilarityConfig {
|
||||||
|
double threshold_on_cpu;
|
||||||
|
double threshold_on_gpu;
|
||||||
|
};
|
||||||
|
|
||||||
|
void RunTest(cv::Mat input, cv::Mat expected_result,
|
||||||
|
const SimilarityConfig& similarity, std::array<float, 16> matrix,
|
||||||
|
int out_width, int out_height,
|
||||||
|
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculatorCpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"cpu", input, expected_result, similarity.threshold_on_cpu, matrix,
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_CPU:input_image"
|
||||||
|
output_stream: "IMAGE:input_image_unified"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculator"
|
||||||
|
input_stream: "IMAGE:input_image_unified"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_unified"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "FromImageCalculator"
|
||||||
|
input_stream: "IMAGE:output_image_unified"
|
||||||
|
output_stream: "IMAGE_CPU:output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"cpu_image", input, expected_result, similarity.threshold_on_cpu,
|
||||||
|
matrix, out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "input_image"
|
||||||
|
output_stream: "input_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculatorGpu"
|
||||||
|
input_stream: "IMAGE:input_image_gpu"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_gpu"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "output_image_gpu"
|
||||||
|
output_stream: "output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"gpu", input, expected_result, similarity.threshold_on_gpu, matrix,
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "input_image"
|
||||||
|
output_stream: "input_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image_gpu"
|
||||||
|
output_stream: "IMAGE:input_image_unified"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculator"
|
||||||
|
input_stream: "IMAGE:input_image_unified"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_unified"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "FromImageCalculator"
|
||||||
|
input_stream: "IMAGE:output_image_unified"
|
||||||
|
output_stream: "IMAGE_GPU:output_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "output_image_gpu"
|
||||||
|
output_stream: "output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"gpu_image", input, expected_result, similarity.threshold_on_gpu,
|
||||||
|
matrix, out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
|
||||||
|
bool keep_aspect_ratio, int out_width,
|
||||||
|
int out_height) {
|
||||||
|
std::array<float, 16> transform_mat;
|
||||||
|
mediapipe::RotatedRect roi_absolute =
|
||||||
|
mediapipe::GetRoi(input.cols, input.rows, roi);
|
||||||
|
mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute)
|
||||||
|
.IgnoreError();
|
||||||
|
mediapipe::GetRotatedSubRectToRectTransformMatrix(
|
||||||
|
roi_absolute, input.cols, input.rows,
|
||||||
|
/*flip_horizontaly=*/false, &transform_mat);
|
||||||
|
return transform_mat;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_with_rotation.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_with_rotation.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, NoOp) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.0f);
|
||||||
|
roi.set_height(1.0f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||||
|
int out_width = 64;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, NoOpBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.0f);
|
||||||
|
roi.set_height(1.0f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||||
|
int out_width = 64;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
|
@ -26,6 +26,11 @@ licenses(["notice"])
|
||||||
|
|
||||||
package(default_visibility = ["//visibility:private"])
|
package(default_visibility = ["//visibility:private"])
|
||||||
|
|
||||||
|
exports_files(
|
||||||
|
glob(["testdata/image_to_tensor/*"]),
|
||||||
|
visibility = ["//mediapipe/calculators/image:__subpackages__"],
|
||||||
|
)
|
||||||
|
|
||||||
selects.config_setting_group(
|
selects.config_setting_group(
|
||||||
name = "compute_shader_unavailable",
|
name = "compute_shader_unavailable",
|
||||||
match_any = [
|
match_any = [
|
||||||
|
@ -351,6 +356,57 @@ cc_library(
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "landmarks_to_tensor_calculator_proto",
|
||||||
|
srcs = ["landmarks_to_tensor_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "landmarks_to_tensor_calculator",
|
||||||
|
srcs = ["landmarks_to_tensor_calculator.cc"],
|
||||||
|
hdrs = ["landmarks_to_tensor_calculator.h"],
|
||||||
|
copts = select({
|
||||||
|
"//mediapipe:apple": [
|
||||||
|
"-x objective-c++",
|
||||||
|
"-fobjc-arc", # enable reference-counting
|
||||||
|
],
|
||||||
|
"//conditions:default": [],
|
||||||
|
}),
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":landmarks_to_tensor_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:tensor",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "landmarks_to_tensor_calculator_test",
|
||||||
|
srcs = ["landmarks_to_tensor_calculator_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":landmarks_to_tensor_calculator",
|
||||||
|
":landmarks_to_tensor_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:tensor",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_googletest//:gtest_main",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_proto_library(
|
mediapipe_proto_library(
|
||||||
name = "tensors_to_floats_calculator_proto",
|
name = "tensors_to_floats_calculator_proto",
|
||||||
srcs = ["tensors_to_floats_calculator.proto"],
|
srcs = ["tensors_to_floats_calculator.proto"],
|
||||||
|
|
|
@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer;
|
||||||
// TENSORS - std::vector<Tensor>
|
// TENSORS - std::vector<Tensor>
|
||||||
// Vector containing a single Tensor populated with an extrated RGB image.
|
// Vector containing a single Tensor populated with an extrated RGB image.
|
||||||
// MATRIX - std::array<float, 16> @Optional
|
// MATRIX - std::array<float, 16> @Optional
|
||||||
// An std::array<float, 16> representing a 4x4 row-major-order matrix which
|
// An std::array<float, 16> representing a 4x4 row-major-order matrix that
|
||||||
// can be used to map a point on the output tensor to a point on the input
|
// maps a point on the input image to a point on the output tensor, and
|
||||||
// image.
|
// can be used to reverse the mapping by inverting the matrix.
|
||||||
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
||||||
// An std::array<float, 4> representing the letterbox padding from the 4
|
// An std::array<float, 4> representing the letterbox padding from the 4
|
||||||
// sides ([left, top, right, bottom]) of the output image, normalized to
|
// sides ([left, top, right, bottom]) of the output image, normalized to
|
||||||
|
|
|
@ -33,7 +33,7 @@ class InferenceCalculatorSelectorImpl
|
||||||
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
absl::StatusOr<CalculatorGraphConfig> GetConfig(
|
||||||
const CalculatorGraphConfig::Node& subgraph_node) {
|
const CalculatorGraphConfig::Node& subgraph_node) {
|
||||||
const auto& options =
|
const auto& options =
|
||||||
Subgraph::GetOptions<::mediapipe::InferenceCalculatorOptions>(
|
Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>(
|
||||||
subgraph_node);
|
subgraph_node);
|
||||||
std::vector<absl::string_view> impls;
|
std::vector<absl::string_view> impls;
|
||||||
const bool should_use_gpu =
|
const bool should_use_gpu =
|
||||||
|
|
|
@ -99,8 +99,11 @@ class InferenceCalculator : public NodeIntf {
|
||||||
kSideInCustomOpResolver{"CUSTOM_OP_RESOLVER"};
|
kSideInCustomOpResolver{"CUSTOM_OP_RESOLVER"};
|
||||||
static constexpr SideInput<TfLiteModelPtr>::Optional kSideInModel{"MODEL"};
|
static constexpr SideInput<TfLiteModelPtr>::Optional kSideInModel{"MODEL"};
|
||||||
static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"};
|
static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"};
|
||||||
|
static constexpr SideInput<
|
||||||
|
mediapipe::InferenceCalculatorOptions::Delegate>::Optional kDelegate{
|
||||||
|
"DELEGATE"};
|
||||||
MEDIAPIPE_NODE_CONTRACT(kInTensors, kSideInCustomOpResolver, kSideInModel,
|
MEDIAPIPE_NODE_CONTRACT(kInTensors, kSideInCustomOpResolver, kSideInModel,
|
||||||
kOutTensors);
|
kOutTensors, kDelegate);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
using TfLiteDelegatePtr =
|
using TfLiteDelegatePtr =
|
||||||
|
|
|
@ -18,6 +18,9 @@ package mediapipe;
|
||||||
|
|
||||||
import "mediapipe/framework/calculator.proto";
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
option java_package = "com.google.mediapipe.calculator.proto";
|
||||||
|
option java_outer_classname = "InferenceCalculatorProto";
|
||||||
|
|
||||||
// Full Example:
|
// Full Example:
|
||||||
//
|
//
|
||||||
// node {
|
// node {
|
||||||
|
@ -31,7 +34,6 @@ import "mediapipe/framework/calculator.proto";
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
message InferenceCalculatorOptions {
|
message InferenceCalculatorOptions {
|
||||||
extend mediapipe.CalculatorOptions {
|
extend mediapipe.CalculatorOptions {
|
||||||
optional InferenceCalculatorOptions ext = 336783863;
|
optional InferenceCalculatorOptions ext = 336783863;
|
||||||
|
@ -66,10 +68,55 @@ message InferenceCalculatorOptions {
|
||||||
// Load pre-compiled serialized binary cache to accelerate init process.
|
// Load pre-compiled serialized binary cache to accelerate init process.
|
||||||
// Only available for OpenCL delegate on Android.
|
// Only available for OpenCL delegate on Android.
|
||||||
// Kernel caching will only be enabled if this path is set.
|
// Kernel caching will only be enabled if this path is set.
|
||||||
|
//
|
||||||
|
// NOTE: binary cache usage may be skipped if valid serialized model,
|
||||||
|
// specified by "serialized_model_dir", exists.
|
||||||
|
//
|
||||||
|
// TODO: update to cached_kernel_dir
|
||||||
optional string cached_kernel_path = 2;
|
optional string cached_kernel_path = 2;
|
||||||
|
|
||||||
|
// A dir to load from and save to a pre-compiled serialized model used to
|
||||||
|
// accelerate init process.
|
||||||
|
//
|
||||||
|
// NOTE: available for OpenCL delegate on Android only when
|
||||||
|
// "use_advanced_gpu_api" is set to true and "model_token" is set
|
||||||
|
// properly.
|
||||||
|
//
|
||||||
|
// NOTE: serialized model takes precedence over binary cache
|
||||||
|
// specified by "cached_kernel_path", which still can be used if
|
||||||
|
// serialized model is invalid or missing.
|
||||||
|
optional string serialized_model_dir = 7;
|
||||||
|
|
||||||
|
// Unique token identifying the model. Used in conjunction with
|
||||||
|
// "serialized_model_dir". It is the caller's responsibility to ensure
|
||||||
|
// there is no clash of the tokens.
|
||||||
|
optional string model_token = 8;
|
||||||
|
|
||||||
|
// Encapsulated compilation/runtime tradeoffs.
|
||||||
|
enum InferenceUsage {
|
||||||
|
UNSPECIFIED = 0;
|
||||||
|
|
||||||
|
// InferenceRunner will be used only once. Therefore, it is important to
|
||||||
|
// minimize bootstrap time as well.
|
||||||
|
FAST_SINGLE_ANSWER = 1;
|
||||||
|
|
||||||
|
// Prefer maximizing the throughput. Same inference runner will be used
|
||||||
|
// repeatedly on different inputs.
|
||||||
|
SUSTAINED_SPEED = 2;
|
||||||
}
|
}
|
||||||
|
optional InferenceUsage usage = 5 [default = SUSTAINED_SPEED];
|
||||||
|
}
|
||||||
|
|
||||||
// Android only.
|
// Android only.
|
||||||
message Nnapi {}
|
message Nnapi {
|
||||||
|
// Directory to store compilation cache. If unspecified, NNAPI will not
|
||||||
|
// try caching the compilation.
|
||||||
|
optional string cache_dir = 1;
|
||||||
|
// Unique token identifying the model. It is the caller's responsibility
|
||||||
|
// to ensure there is no clash of the tokens. If unspecified, NNAPI will
|
||||||
|
// not try caching the compilation.
|
||||||
|
optional string model_token = 2;
|
||||||
|
}
|
||||||
message Xnnpack {
|
message Xnnpack {
|
||||||
// Number of threads for XNNPACK delegate. (By default, calculator tries
|
// Number of threads for XNNPACK delegate. (By default, calculator tries
|
||||||
// to choose optimal number of threads depending on the device.)
|
// to choose optimal number of threads depending on the device.)
|
||||||
|
|
|
@ -50,11 +50,13 @@ int GetXnnpackDefaultNumThreads() {
|
||||||
// Returns number of threads to configure XNNPACK delegate with.
|
// Returns number of threads to configure XNNPACK delegate with.
|
||||||
// Returns user provided value if specified. Otherwise, tries to choose optimal
|
// Returns user provided value if specified. Otherwise, tries to choose optimal
|
||||||
// number of threads depending on the device.
|
// number of threads depending on the device.
|
||||||
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
|
int GetXnnpackNumThreads(
|
||||||
|
const bool opts_has_delegate,
|
||||||
|
const mediapipe::InferenceCalculatorOptions::Delegate& opts_delegate) {
|
||||||
static constexpr int kDefaultNumThreads = -1;
|
static constexpr int kDefaultNumThreads = -1;
|
||||||
if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
|
if (opts_has_delegate && opts_delegate.has_xnnpack() &&
|
||||||
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
|
opts_delegate.xnnpack().num_threads() != kDefaultNumThreads) {
|
||||||
return opts.delegate().xnnpack().num_threads();
|
return opts_delegate.xnnpack().num_threads();
|
||||||
}
|
}
|
||||||
return GetXnnpackDefaultNumThreads();
|
return GetXnnpackDefaultNumThreads();
|
||||||
}
|
}
|
||||||
|
@ -73,6 +75,7 @@ class InferenceCalculatorCpuImpl
|
||||||
private:
|
private:
|
||||||
absl::Status LoadModel(CalculatorContext* cc);
|
absl::Status LoadModel(CalculatorContext* cc);
|
||||||
absl::Status LoadDelegate(CalculatorContext* cc);
|
absl::Status LoadDelegate(CalculatorContext* cc);
|
||||||
|
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
|
||||||
|
|
||||||
// TfLite requires us to keep the model alive as long as the interpreter is.
|
// TfLite requires us to keep the model alive as long as the interpreter is.
|
||||||
Packet<TfLiteModelPtr> model_packet_;
|
Packet<TfLiteModelPtr> model_packet_;
|
||||||
|
@ -91,8 +94,7 @@ absl::Status InferenceCalculatorCpuImpl::UpdateContract(
|
||||||
|
|
||||||
absl::Status InferenceCalculatorCpuImpl::Open(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorCpuImpl::Open(CalculatorContext* cc) {
|
||||||
MP_RETURN_IF_ERROR(LoadModel(cc));
|
MP_RETURN_IF_ERROR(LoadModel(cc));
|
||||||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
return LoadDelegateAndAllocateTensors(cc);
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
|
||||||
|
@ -156,34 +158,61 @@ absl::Status InferenceCalculatorCpuImpl::LoadModel(CalculatorContext* cc) {
|
||||||
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
||||||
#endif // __EMSCRIPTEN__
|
#endif // __EMSCRIPTEN__
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorCpuImpl::LoadDelegateAndAllocateTensors(
|
||||||
|
CalculatorContext* cc) {
|
||||||
|
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||||
|
|
||||||
|
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
|
||||||
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
||||||
// TODO: Support quantized tensors.
|
// TODO: Support quantized tensors.
|
||||||
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type !=
|
RET_CHECK_NE(
|
||||||
|
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
|
||||||
kTfLiteAffineQuantization);
|
kTfLiteAffineQuantization);
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
|
||||||
const auto& calculator_opts =
|
const auto& calculator_opts =
|
||||||
cc->Options<mediapipe::InferenceCalculatorOptions>();
|
cc->Options<mediapipe::InferenceCalculatorOptions>();
|
||||||
if (calculator_opts.has_delegate() &&
|
auto opts_delegate = calculator_opts.delegate();
|
||||||
calculator_opts.delegate().has_tflite()) {
|
if (!kDelegate(cc).IsEmpty()) {
|
||||||
|
mediapipe::InferenceCalculatorOptions::Delegate input_side_packet_delegate =
|
||||||
|
kDelegate(cc).Get();
|
||||||
|
CHECK(input_side_packet_delegate.has_tflite() ||
|
||||||
|
input_side_packet_delegate.has_xnnpack() ||
|
||||||
|
input_side_packet_delegate.has_nnapi() ||
|
||||||
|
input_side_packet_delegate.delegate_case() ==
|
||||||
|
mediapipe::InferenceCalculatorOptions::Delegate::DELEGATE_NOT_SET)
|
||||||
|
<< "inference_calculator_cpu only supports delegate input side packet "
|
||||||
|
<< "for TFLite, XNNPack and Nnapi";
|
||||||
|
opts_delegate.MergeFrom(input_side_packet_delegate);
|
||||||
|
}
|
||||||
|
const bool opts_has_delegate =
|
||||||
|
calculator_opts.has_delegate() || !kDelegate(cc).IsEmpty();
|
||||||
|
if (opts_has_delegate && opts_delegate.has_tflite()) {
|
||||||
// Default tflite inference requeqsted - no need to modify graph.
|
// Default tflite inference requeqsted - no need to modify graph.
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(MEDIAPIPE_ANDROID)
|
#if defined(MEDIAPIPE_ANDROID)
|
||||||
const bool nnapi_requested = calculator_opts.has_delegate()
|
const bool nnapi_requested = opts_has_delegate ? opts_delegate.has_nnapi()
|
||||||
? calculator_opts.delegate().has_nnapi()
|
|
||||||
: calculator_opts.use_nnapi();
|
: calculator_opts.use_nnapi();
|
||||||
if (nnapi_requested) {
|
if (nnapi_requested) {
|
||||||
// Attempt to use NNAPI.
|
// Attempt to use NNAPI.
|
||||||
// If not supported, the default CPU delegate will be created and used.
|
// If not supported, the default CPU delegate will be created and used.
|
||||||
interpreter_->SetAllowFp16PrecisionForFp32(1);
|
interpreter_->SetAllowFp16PrecisionForFp32(1);
|
||||||
delegate_ = TfLiteDelegatePtr(tflite::NnApiDelegate(), [](TfLiteDelegate*) {
|
tflite::StatefulNnApiDelegate::Options options;
|
||||||
// No need to free according to tflite::NnApiDelegate() documentation.
|
const auto& nnapi = opts_delegate.nnapi();
|
||||||
});
|
// Set up cache_dir and model_token for NNAPI compilation cache.
|
||||||
|
options.cache_dir =
|
||||||
|
nnapi.has_cache_dir() ? nnapi.cache_dir().c_str() : nullptr;
|
||||||
|
options.model_token =
|
||||||
|
nnapi.has_model_token() ? nnapi.model_token().c_str() : nullptr;
|
||||||
|
delegate_ = TfLiteDelegatePtr(new tflite::StatefulNnApiDelegate(options),
|
||||||
|
[](TfLiteDelegate*) {});
|
||||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||||
kTfLiteOk);
|
kTfLiteOk);
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -193,13 +222,13 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
|
||||||
#if defined(__EMSCRIPTEN__)
|
#if defined(__EMSCRIPTEN__)
|
||||||
const bool use_xnnpack = true;
|
const bool use_xnnpack = true;
|
||||||
#else
|
#else
|
||||||
const bool use_xnnpack = calculator_opts.has_delegate() &&
|
const bool use_xnnpack = opts_has_delegate && opts_delegate.has_xnnpack();
|
||||||
calculator_opts.delegate().has_xnnpack();
|
|
||||||
#endif // defined(__EMSCRIPTEN__)
|
#endif // defined(__EMSCRIPTEN__)
|
||||||
|
|
||||||
if (use_xnnpack) {
|
if (use_xnnpack) {
|
||||||
TfLiteXNNPackDelegateOptions xnnpack_opts{};
|
TfLiteXNNPackDelegateOptions xnnpack_opts{};
|
||||||
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
|
xnnpack_opts.num_threads =
|
||||||
|
GetXnnpackNumThreads(opts_has_delegate, opts_delegate);
|
||||||
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
|
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
|
||||||
&TfLiteXNNPackDelegateDelete);
|
&TfLiteXNNPackDelegateDelete);
|
||||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||||
|
|
|
@ -18,7 +18,9 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "absl/memory/memory.h"
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/status/status.h"
|
||||||
#include "mediapipe/calculators/tensor/inference_calculator.h"
|
#include "mediapipe/calculators/tensor/inference_calculator.h"
|
||||||
|
#include "mediapipe/framework/deps/file_path.h"
|
||||||
#include "mediapipe/util/tflite/config.h"
|
#include "mediapipe/util/tflite/config.h"
|
||||||
|
|
||||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||||
|
@ -48,10 +50,11 @@ class InferenceCalculatorGlImpl
|
||||||
absl::Status Close(CalculatorContext* cc) override;
|
absl::Status Close(CalculatorContext* cc) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
absl::Status ReadKernelsFromFile();
|
absl::Status ReadGpuCaches();
|
||||||
absl::Status WriteKernelsToFile();
|
absl::Status SaveGpuCaches();
|
||||||
absl::Status LoadModel(CalculatorContext* cc);
|
absl::Status LoadModel(CalculatorContext* cc);
|
||||||
absl::Status LoadDelegate(CalculatorContext* cc);
|
absl::Status LoadDelegate(CalculatorContext* cc);
|
||||||
|
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
|
||||||
absl::Status InitTFLiteGPURunner(CalculatorContext* cc);
|
absl::Status InitTFLiteGPURunner(CalculatorContext* cc);
|
||||||
|
|
||||||
// TfLite requires us to keep the model alive as long as the interpreter is.
|
// TfLite requires us to keep the model alive as long as the interpreter is.
|
||||||
|
@ -65,6 +68,8 @@ class InferenceCalculatorGlImpl
|
||||||
bool allow_precision_loss_ = false;
|
bool allow_precision_loss_ = false;
|
||||||
mediapipe::InferenceCalculatorOptions::Delegate::Gpu::Api
|
mediapipe::InferenceCalculatorOptions::Delegate::Gpu::Api
|
||||||
tflite_gpu_runner_api_;
|
tflite_gpu_runner_api_;
|
||||||
|
mediapipe::InferenceCalculatorOptions::Delegate::Gpu::InferenceUsage
|
||||||
|
tflite_gpu_runner_usage_;
|
||||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||||
|
|
||||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||||
|
@ -78,6 +83,8 @@ class InferenceCalculatorGlImpl
|
||||||
|
|
||||||
bool use_kernel_caching_ = false;
|
bool use_kernel_caching_ = false;
|
||||||
std::string cached_kernel_filename_;
|
std::string cached_kernel_filename_;
|
||||||
|
bool use_serialized_model_ = false;
|
||||||
|
std::string serialized_model_path_;
|
||||||
};
|
};
|
||||||
|
|
||||||
absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
|
absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
|
||||||
|
@ -91,22 +98,43 @@ absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
|
||||||
|
|
||||||
absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
|
||||||
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
||||||
use_advanced_gpu_api_ = options.has_delegate() &&
|
mediapipe::InferenceCalculatorOptions::Delegate delegate = options.delegate();
|
||||||
options.delegate().has_gpu() &&
|
if (!kDelegate(cc).IsEmpty()) {
|
||||||
options.delegate().gpu().use_advanced_gpu_api();
|
mediapipe::InferenceCalculatorOptions::Delegate input_side_packet_delegate =
|
||||||
allow_precision_loss_ = options.delegate().gpu().allow_precision_loss();
|
kDelegate(cc).Get();
|
||||||
tflite_gpu_runner_api_ = options.delegate().gpu().api();
|
CHECK(input_side_packet_delegate.has_gpu() ||
|
||||||
use_kernel_caching_ = use_advanced_gpu_api_ &&
|
input_side_packet_delegate.delegate_case() ==
|
||||||
options.delegate().gpu().has_cached_kernel_path();
|
mediapipe::InferenceCalculatorOptions::Delegate::DELEGATE_NOT_SET)
|
||||||
|
<< "inference_calculator_gl only supports delegate input side packet "
|
||||||
|
<< "for Gpu";
|
||||||
|
delegate.MergeFrom(input_side_packet_delegate);
|
||||||
|
}
|
||||||
|
const bool has_delegate = options.has_delegate() || !kDelegate(cc).IsEmpty();
|
||||||
|
use_advanced_gpu_api_ = has_delegate && delegate.has_gpu() &&
|
||||||
|
delegate.gpu().use_advanced_gpu_api();
|
||||||
|
allow_precision_loss_ = delegate.gpu().allow_precision_loss();
|
||||||
|
tflite_gpu_runner_api_ = delegate.gpu().api();
|
||||||
|
tflite_gpu_runner_usage_ = delegate.gpu().usage();
|
||||||
|
use_kernel_caching_ =
|
||||||
|
use_advanced_gpu_api_ && delegate.gpu().has_cached_kernel_path();
|
||||||
|
use_serialized_model_ = use_advanced_gpu_api_ &&
|
||||||
|
delegate.gpu().has_serialized_model_dir() &&
|
||||||
|
delegate.gpu().has_model_token();
|
||||||
use_gpu_delegate_ = !use_advanced_gpu_api_;
|
use_gpu_delegate_ = !use_advanced_gpu_api_;
|
||||||
|
|
||||||
if (use_kernel_caching_) {
|
if (use_kernel_caching_) {
|
||||||
#ifdef MEDIAPIPE_ANDROID
|
#ifdef MEDIAPIPE_ANDROID
|
||||||
cached_kernel_filename_ = options.delegate().gpu().cached_kernel_path() +
|
cached_kernel_filename_ = delegate.gpu().cached_kernel_path() +
|
||||||
mediapipe::File::Basename(options.model_path()) +
|
mediapipe::File::Basename(options.model_path()) +
|
||||||
".ker";
|
".ker";
|
||||||
#endif // MEDIAPIPE_ANDROID
|
#endif // MEDIAPIPE_ANDROID
|
||||||
}
|
}
|
||||||
|
if (use_serialized_model_) {
|
||||||
|
#ifdef MEDIAPIPE_ANDROID
|
||||||
|
serialized_model_path_ = mediapipe::file::JoinPath(
|
||||||
|
delegate.gpu().serialized_model_dir(), delegate.gpu().model_token());
|
||||||
|
#endif // MEDIAPIPE_ANDROID
|
||||||
|
}
|
||||||
|
|
||||||
// When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
|
// When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
|
||||||
// for everything.
|
// for everything.
|
||||||
|
@ -115,9 +143,10 @@ absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this,
|
MP_RETURN_IF_ERROR(
|
||||||
&cc]() -> ::mediapipe::Status {
|
gpu_helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
|
||||||
return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc) : LoadDelegate(cc);
|
return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc)
|
||||||
|
: LoadDelegateAndAllocateTensors(cc);
|
||||||
}));
|
}));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
@ -193,7 +222,7 @@ absl::Status InferenceCalculatorGlImpl::Process(CalculatorContext* cc) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() {
|
absl::Status InferenceCalculatorGlImpl::SaveGpuCaches() {
|
||||||
#ifdef MEDIAPIPE_ANDROID
|
#ifdef MEDIAPIPE_ANDROID
|
||||||
if (use_kernel_caching_) {
|
if (use_kernel_caching_) {
|
||||||
// Save kernel file.
|
// Save kernel file.
|
||||||
|
@ -203,12 +232,22 @@ absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() {
|
||||||
MP_RETURN_IF_ERROR(
|
MP_RETURN_IF_ERROR(
|
||||||
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
|
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
|
||||||
}
|
}
|
||||||
|
if (use_serialized_model_) {
|
||||||
|
// Save serialized model file.
|
||||||
|
ASSIGN_OR_RETURN(std::vector<uint8_t> serialized_model_vec,
|
||||||
|
tflite_gpu_runner_->GetSerializedModel());
|
||||||
|
absl::string_view serialized_model(
|
||||||
|
reinterpret_cast<char*>(serialized_model_vec.data()),
|
||||||
|
serialized_model_vec.size());
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
mediapipe::file::SetContents(serialized_model_path_, serialized_model));
|
||||||
|
}
|
||||||
#endif // MEDIAPIPE_ANDROID
|
#endif // MEDIAPIPE_ANDROID
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
|
||||||
MP_RETURN_IF_ERROR(WriteKernelsToFile());
|
MP_RETURN_IF_ERROR(SaveGpuCaches());
|
||||||
if (use_gpu_delegate_) {
|
if (use_gpu_delegate_) {
|
||||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
|
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
|
||||||
gpu_buffers_in_.clear();
|
gpu_buffers_in_.clear();
|
||||||
|
@ -222,17 +261,24 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorGlImpl::ReadKernelsFromFile() {
|
absl::Status InferenceCalculatorGlImpl::ReadGpuCaches() {
|
||||||
#ifdef MEDIAPIPE_ANDROID
|
#ifdef MEDIAPIPE_ANDROID
|
||||||
if (use_kernel_caching_) {
|
if (use_kernel_caching_ && File::Exists(cached_kernel_filename_)) {
|
||||||
// Load pre-compiled kernel file.
|
// Load pre-compiled kernel file.
|
||||||
if (mediapipe::File::Exists(cached_kernel_filename_)) {
|
|
||||||
std::string cache_str;
|
std::string cache_str;
|
||||||
MP_RETURN_IF_ERROR(
|
MP_RETURN_IF_ERROR(
|
||||||
mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
|
mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
|
||||||
std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
|
std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
|
||||||
tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
|
tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
|
||||||
}
|
}
|
||||||
|
if (use_serialized_model_ && File::Exists(serialized_model_path_)) {
|
||||||
|
// Load serialized model file.
|
||||||
|
std::string serialized_model_str;
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
file::GetContents(serialized_model_path_, &serialized_model_str));
|
||||||
|
std::vector<uint8_t> serialized_model_vec(serialized_model_str.begin(),
|
||||||
|
serialized_model_str.end());
|
||||||
|
tflite_gpu_runner_->SetSerializedModel(std::move(serialized_model_vec));
|
||||||
}
|
}
|
||||||
#endif // MEDIAPIPE_ANDROID
|
#endif // MEDIAPIPE_ANDROID
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -253,9 +299,27 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
|
||||||
: tflite::gpu::InferencePriority::MAX_PRECISION;
|
: tflite::gpu::InferencePriority::MAX_PRECISION;
|
||||||
options.priority2 = tflite::gpu::InferencePriority::AUTO;
|
options.priority2 = tflite::gpu::InferencePriority::AUTO;
|
||||||
options.priority3 = tflite::gpu::InferencePriority::AUTO;
|
options.priority3 = tflite::gpu::InferencePriority::AUTO;
|
||||||
|
switch (tflite_gpu_runner_usage_) {
|
||||||
|
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
|
||||||
|
FAST_SINGLE_ANSWER: {
|
||||||
|
options.usage = tflite::gpu::InferenceUsage::FAST_SINGLE_ANSWER;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
|
||||||
|
SUSTAINED_SPEED: {
|
||||||
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
|
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::UNSPECIFIED: {
|
||||||
|
return absl::InternalError("inference usage need to be specified.");
|
||||||
|
}
|
||||||
|
}
|
||||||
tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
|
tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
|
||||||
switch (tflite_gpu_runner_api_) {
|
switch (tflite_gpu_runner_api_) {
|
||||||
|
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::ANY: {
|
||||||
|
// Do not need to force any specific API.
|
||||||
|
break;
|
||||||
|
}
|
||||||
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENGL: {
|
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENGL: {
|
||||||
tflite_gpu_runner_->ForceOpenGL();
|
tflite_gpu_runner_->ForceOpenGL();
|
||||||
break;
|
break;
|
||||||
|
@ -264,10 +328,6 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
|
||||||
tflite_gpu_runner_->ForceOpenCL();
|
tflite_gpu_runner_->ForceOpenCL();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::ANY: {
|
|
||||||
// Do not need to force any specific API.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
|
MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
|
||||||
model, op_resolver, /*allow_quant_ops=*/true));
|
model, op_resolver, /*allow_quant_ops=*/true));
|
||||||
|
@ -282,7 +342,7 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
|
||||||
tflite_gpu_runner_->GetOutputShapes()[i].c};
|
tflite_gpu_runner_->GetOutputShapes()[i].c};
|
||||||
}
|
}
|
||||||
|
|
||||||
MP_RETURN_IF_ERROR(ReadKernelsFromFile());
|
MP_RETURN_IF_ERROR(ReadGpuCaches());
|
||||||
|
|
||||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
|
MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
|
||||||
|
|
||||||
|
@ -306,11 +366,19 @@ absl::Status InferenceCalculatorGlImpl::LoadModel(CalculatorContext* cc) {
|
||||||
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
||||||
#endif // __EMSCRIPTEN__
|
#endif // __EMSCRIPTEN__
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorGlImpl::LoadDelegateAndAllocateTensors(
|
||||||
|
CalculatorContext* cc) {
|
||||||
|
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||||
|
|
||||||
|
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
|
||||||
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
||||||
// TODO: Support quantized tensors.
|
// TODO: Support quantized tensors.
|
||||||
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type !=
|
RET_CHECK_NE(
|
||||||
|
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
|
||||||
kTfLiteAffineQuantization);
|
kTfLiteAffineQuantization);
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,7 @@ class InferenceCalculatorMetalImpl
|
||||||
private:
|
private:
|
||||||
absl::Status LoadModel(CalculatorContext* cc);
|
absl::Status LoadModel(CalculatorContext* cc);
|
||||||
absl::Status LoadDelegate(CalculatorContext* cc);
|
absl::Status LoadDelegate(CalculatorContext* cc);
|
||||||
|
absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
|
||||||
|
|
||||||
// TfLite requires us to keep the model alive as long as the interpreter is.
|
// TfLite requires us to keep the model alive as long as the interpreter is.
|
||||||
Packet<TfLiteModelPtr> model_packet_;
|
Packet<TfLiteModelPtr> model_packet_;
|
||||||
|
@ -130,8 +131,7 @@ absl::Status InferenceCalculatorMetalImpl::Open(CalculatorContext* cc) {
|
||||||
|
|
||||||
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||||
RET_CHECK(gpu_helper_);
|
RET_CHECK(gpu_helper_);
|
||||||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
return LoadDelegateAndAllocateTensors(cc);
|
||||||
return absl::OkStatus();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) {
|
absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) {
|
||||||
|
@ -212,11 +212,19 @@ absl::Status InferenceCalculatorMetalImpl::LoadModel(CalculatorContext* cc) {
|
||||||
interpreter_->SetNumThreads(
|
interpreter_->SetNumThreads(
|
||||||
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status InferenceCalculatorMetalImpl::LoadDelegateAndAllocateTensors(
|
||||||
|
CalculatorContext* cc) {
|
||||||
|
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||||
|
|
||||||
|
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
|
||||||
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
||||||
// TODO: Support quantized tensors.
|
// TODO: Support quantized tensors.
|
||||||
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type !=
|
RET_CHECK_NE(
|
||||||
|
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
|
||||||
kTfLiteAffineQuantization);
|
kTfLiteAffineQuantization);
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,6 +244,7 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) {
|
||||||
TfLiteDelegatePtr(TFLGpuDelegateCreate(&options), &TFLGpuDelegateDelete);
|
TfLiteDelegatePtr(TFLGpuDelegateCreate(&options), &TFLGpuDelegateDelete);
|
||||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||||
kTfLiteOk);
|
kTfLiteOk);
|
||||||
|
|
||||||
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||||
|
|
||||||
// Get input image sizes.
|
// Get input image sizes.
|
||||||
|
|
101
mediapipe/calculators/tensor/landmarks_to_tensor_calculator.cc
Normal file
101
mediapipe/calculators/tensor/landmarks_to_tensor_calculator.cc
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace api2 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
float GetAttribute(
|
||||||
|
const Landmark& landmark,
|
||||||
|
const LandmarksToTensorCalculatorOptions::Attribute& attribute) {
|
||||||
|
switch (attribute) {
|
||||||
|
case LandmarksToTensorCalculatorOptions::X:
|
||||||
|
return landmark.x();
|
||||||
|
case LandmarksToTensorCalculatorOptions::Y:
|
||||||
|
return landmark.y();
|
||||||
|
case LandmarksToTensorCalculatorOptions::Z:
|
||||||
|
return landmark.z();
|
||||||
|
case LandmarksToTensorCalculatorOptions::VISIBILITY:
|
||||||
|
return landmark.visibility();
|
||||||
|
case LandmarksToTensorCalculatorOptions::PRESENCE:
|
||||||
|
return landmark.presence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class LandmarksToTensorCalculatorImpl
|
||||||
|
: public NodeImpl<LandmarksToTensorCalculator> {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) override {
|
||||||
|
options_ = cc->Options<LandmarksToTensorCalculatorOptions>();
|
||||||
|
RET_CHECK(options_.attributes_size() > 0)
|
||||||
|
<< "At least one attribute must be specified";
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
if (kInLandmarkList(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get input landmarks.
|
||||||
|
const auto& in_landmarks = *kInLandmarkList(cc);
|
||||||
|
|
||||||
|
// Determine tensor shape.
|
||||||
|
const int n_landmarks = in_landmarks.landmark_size();
|
||||||
|
const int n_attributes = options_.attributes_size();
|
||||||
|
auto tensor_shape = options_.flatten()
|
||||||
|
? Tensor::Shape{1, n_landmarks * n_attributes}
|
||||||
|
: Tensor::Shape{1, n_landmarks, n_attributes};
|
||||||
|
|
||||||
|
// Create empty tesnor.
|
||||||
|
Tensor tensor(Tensor::ElementType::kFloat32, tensor_shape);
|
||||||
|
auto* buffer = tensor.GetCpuWriteView().buffer<float>();
|
||||||
|
|
||||||
|
// Fill tensor with landmark attributes.
|
||||||
|
for (int i = 0; i < n_landmarks; ++i) {
|
||||||
|
for (int j = 0; j < n_attributes; ++j) {
|
||||||
|
buffer[i * n_attributes + j] =
|
||||||
|
GetAttribute(in_landmarks.landmark(i), options_.attributes(j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return vector with a single tensor.
|
||||||
|
auto result = std::vector<Tensor>();
|
||||||
|
result.push_back(std::move(tensor));
|
||||||
|
kOutTensors(cc).Send(std::move(result));
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
LandmarksToTensorCalculatorOptions options_;
|
||||||
|
};
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(LandmarksToTensorCalculatorImpl);
|
||||||
|
|
||||||
|
} // namespace api2
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,61 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace api2 {
|
||||||
|
|
||||||
|
// A calculator for converting landmars into a Tensor.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// LANDMARKS - LandmarkList
|
||||||
|
// Landmarks to be converted into a Tensor.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// TENSORS - std::vector<Tensor>
|
||||||
|
// Vector containing a single Tensor populated with landmark values.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// node {
|
||||||
|
// calculator: "LandmarksToTensorCalculator"
|
||||||
|
// input_stream: "LANDMARKS:landmarks"
|
||||||
|
// output_stream: "TENSORS:tensors"
|
||||||
|
// options: {
|
||||||
|
// [mediapipe.LandmarksToTensorCalculatorOptions.ext] {
|
||||||
|
// attributes: [X, Y, Z, VISIBILITY, PRESENCE]
|
||||||
|
// # flatten: true
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
class LandmarksToTensorCalculator : public NodeIntf {
|
||||||
|
public:
|
||||||
|
static constexpr Input<LandmarkList>::Optional kInLandmarkList{"LANDMARKS"};
|
||||||
|
static constexpr Output<std::vector<Tensor>> kOutTensors{"TENSORS"};
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(LandmarksToTensorCalculator, kInLandmarkList,
|
||||||
|
kOutTensors);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace api2
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_LANDMARKS_TO_TENSOR_CALCULATOR_H_
|
|
@ -0,0 +1,44 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// The option proto for the LandmarksToTensorCalculator.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message LandmarksToTensorCalculatorOptions {
|
||||||
|
extend mediapipe.CalculatorOptions {
|
||||||
|
optional LandmarksToTensorCalculatorOptions ext = 394810235;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Attribute {
|
||||||
|
X = 0;
|
||||||
|
Y = 1;
|
||||||
|
Z = 2;
|
||||||
|
VISIBILITY = 3;
|
||||||
|
PRESENCE = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subset and order of attributes as they should appear in the output Tensor.
|
||||||
|
// Should contain at least one attribute.
|
||||||
|
repeated Attribute attributes = 1;
|
||||||
|
|
||||||
|
// Collapses all landmark attributes into a one dimensional tensor (i.e.
|
||||||
|
// switches from (n_landmarks, n_attributes) to (n_landmarks * n_attributes)
|
||||||
|
// representation).
|
||||||
|
optional bool flatten = 2 [default = false];
|
||||||
|
}
|
|
@ -0,0 +1,155 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/calculators/tensor/landmarks_to_tensor_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
#include "mediapipe/framework/port/gmock.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using ::mediapipe::ParseTextProtoOrDie;
|
||||||
|
using Node = ::mediapipe::CalculatorGraphConfig::Node;
|
||||||
|
|
||||||
|
void RunLandmarks(mediapipe::CalculatorRunner* runner,
|
||||||
|
const LandmarkList& landmarks) {
|
||||||
|
runner->MutableInputs()
|
||||||
|
->Tag("LANDMARKS")
|
||||||
|
.packets.push_back(MakePacket<LandmarkList>(landmarks).At(Timestamp(0)));
|
||||||
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
}
|
||||||
|
|
||||||
|
const Tensor& GetOutputTensor(mediapipe::CalculatorRunner* runner) {
|
||||||
|
const auto& output_packets = runner->Outputs().Tag("TENSORS").packets;
|
||||||
|
EXPECT_EQ(output_packets.size(), 1);
|
||||||
|
|
||||||
|
const auto& tensors = output_packets[0].Get<std::vector<Tensor>>();
|
||||||
|
EXPECT_EQ(tensors.size(), 1);
|
||||||
|
|
||||||
|
return tensors[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void ValidateTensor(const Tensor& tensor,
|
||||||
|
const std::vector<int>& expected_shape,
|
||||||
|
const std::vector<float>& expected_values) {
|
||||||
|
EXPECT_EQ(tensor.shape().dims, expected_shape);
|
||||||
|
EXPECT_EQ(tensor.shape().num_elements(), expected_values.size());
|
||||||
|
|
||||||
|
auto* tensor_buffer = tensor.GetCpuReadView().buffer<float>();
|
||||||
|
const std::vector<float> tensor_values(
|
||||||
|
tensor_buffer, tensor_buffer + tensor.shape().num_elements());
|
||||||
|
EXPECT_THAT(tensor_values, testing::ElementsAreArray(expected_values));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LandmarksToTensorCalculatorTest, AllAttributes) {
|
||||||
|
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
|
||||||
|
calculator: "LandmarksToTensorCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
output_stream: "TENSORS:tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
|
||||||
|
attributes: [ X, Y, Z, VISIBILITY, PRESENCE ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb"));
|
||||||
|
|
||||||
|
LandmarkList landmarks;
|
||||||
|
auto* landmark1 = landmarks.add_landmark();
|
||||||
|
landmark1->set_x(1.0f);
|
||||||
|
landmark1->set_y(2.0f);
|
||||||
|
landmark1->set_z(3.0f);
|
||||||
|
landmark1->set_visibility(4.0f);
|
||||||
|
landmark1->set_presence(5.0f);
|
||||||
|
auto* landmark2 = landmarks.add_landmark();
|
||||||
|
landmark2->set_x(6.0f);
|
||||||
|
landmark2->set_y(7.0f);
|
||||||
|
landmark2->set_z(8.0f);
|
||||||
|
landmark2->set_visibility(9.0f);
|
||||||
|
landmark2->set_presence(10.0f);
|
||||||
|
|
||||||
|
RunLandmarks(&runner, landmarks);
|
||||||
|
const auto& tensor = GetOutputTensor(&runner);
|
||||||
|
ValidateTensor(tensor, /*expected_shape=*/{1, 2, 5}, /*expected_values=*/
|
||||||
|
{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LandmarksToTensorCalculatorTest, XYZAttributes) {
|
||||||
|
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
|
||||||
|
calculator: "LandmarksToTensorCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
output_stream: "TENSORS:tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
|
||||||
|
attributes: [ X, Y, Z ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb"));
|
||||||
|
|
||||||
|
LandmarkList landmarks;
|
||||||
|
auto* landmark1 = landmarks.add_landmark();
|
||||||
|
landmark1->set_x(1.0f);
|
||||||
|
landmark1->set_y(2.0f);
|
||||||
|
landmark1->set_z(3.0f);
|
||||||
|
auto* landmark2 = landmarks.add_landmark();
|
||||||
|
landmark2->set_x(6.0f);
|
||||||
|
landmark2->set_y(7.0f);
|
||||||
|
landmark2->set_z(8.0f);
|
||||||
|
|
||||||
|
RunLandmarks(&runner, landmarks);
|
||||||
|
const auto& tensor = GetOutputTensor(&runner);
|
||||||
|
ValidateTensor(tensor, /*expected_shape=*/{1, 2, 3}, /*expected_values=*/
|
||||||
|
{1.0f, 2.0f, 3.0f, 6.0f, 7.0f, 8.0f});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LandmarksToTensorCalculatorTest, XYZAttributes_Flatten) {
|
||||||
|
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"pb(
|
||||||
|
calculator: "LandmarksToTensorCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
output_stream: "TENSORS:tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.LandmarksToTensorCalculatorOptions.ext] {
|
||||||
|
attributes: [ X, Y, Z ]
|
||||||
|
flatten: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb"));
|
||||||
|
|
||||||
|
LandmarkList landmarks;
|
||||||
|
auto* landmark1 = landmarks.add_landmark();
|
||||||
|
landmark1->set_x(1.0f);
|
||||||
|
landmark1->set_y(2.0f);
|
||||||
|
landmark1->set_z(3.0f);
|
||||||
|
auto* landmark2 = landmarks.add_landmark();
|
||||||
|
landmark2->set_x(6.0f);
|
||||||
|
landmark2->set_y(7.0f);
|
||||||
|
landmark2->set_z(8.0f);
|
||||||
|
|
||||||
|
RunLandmarks(&runner, landmarks);
|
||||||
|
const auto& tensor = GetOutputTensor(&runner);
|
||||||
|
ValidateTensor(tensor, /*expected_shape=*/{1, 6}, /*expected_values=*/
|
||||||
|
{1.0f, 2.0f, 3.0f, 6.0f, 7.0f, 8.0f});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
|
@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
||||||
uniform sampler2D frame;
|
uniform sampler2D frame;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
$1 // flip
|
vec2 coord = $1
|
||||||
vec4 pixel = texture2D(frame, sample_coordinate);
|
vec4 pixel = texture2D(frame, coord);
|
||||||
$2 // normalize [-1,1]
|
$2 // normalize [-1,1]
|
||||||
fragColor.r = pixel.r; // r channel
|
fragColor.r = pixel.r; // r channel
|
||||||
$3 // g & b channels
|
$3 // g & b channels
|
||||||
|
@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
||||||
})",
|
})",
|
||||||
/*$0=*/single_channel ? "vec1" : "vec4",
|
/*$0=*/single_channel ? "vec1" : "vec4",
|
||||||
/*$1=*/
|
/*$1=*/
|
||||||
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;"
|
flip_vertically_
|
||||||
: "",
|
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
|
||||||
|
: "sample_coordinate;",
|
||||||
/*$2=*/output_range_.has_value()
|
/*$2=*/output_range_.has_value()
|
||||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||||
(output_range_->second - output_range_->first),
|
(output_range_->second - output_range_->first),
|
||||||
|
|
|
@ -670,7 +670,8 @@ absl::Status TensorsToDetectionsCalculator::ConvertToDetections(
|
||||||
detection_boxes[box_offset + 2], detection_boxes[box_offset + 3],
|
detection_boxes[box_offset + 2], detection_boxes[box_offset + 3],
|
||||||
detection_scores[i], detection_classes[i], options_.flip_vertically());
|
detection_scores[i], detection_classes[i], options_.flip_vertically());
|
||||||
const auto& bbox = detection.location_data().relative_bounding_box();
|
const auto& bbox = detection.location_data().relative_bounding_box();
|
||||||
if (bbox.width() < 0 || bbox.height() < 0) {
|
if (bbox.width() < 0 || bbox.height() < 0 || std::isnan(bbox.width()) ||
|
||||||
|
std::isnan(bbox.height())) {
|
||||||
// Decoded detection boxes could have negative values for width/height due
|
// Decoded detection boxes could have negative values for width/height due
|
||||||
// to model prediction. Filter out those boxes since some downstream
|
// to model prediction. Filter out those boxes since some downstream
|
||||||
// calculators may assume non-negative values. (b/171391719)
|
// calculators may assume non-negative values. (b/171391719)
|
||||||
|
|
|
@ -138,7 +138,6 @@ using ::tflite::gpu::gl::GlShader;
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// Currently only OpenGLES 3.1 and CPU backends supported.
|
|
||||||
// TODO Refactor and add support for other backends/platforms.
|
// TODO Refactor and add support for other backends/platforms.
|
||||||
//
|
//
|
||||||
class TensorsToSegmentationCalculator : public CalculatorBase {
|
class TensorsToSegmentationCalculator : public CalculatorBase {
|
||||||
|
|
|
@ -88,6 +88,13 @@ proto_library(
|
||||||
deps = ["//mediapipe/framework:calculator_proto"],
|
deps = ["//mediapipe/framework:calculator_proto"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
proto_library(
|
||||||
|
name = "tensor_to_vector_string_calculator_options_proto",
|
||||||
|
srcs = ["tensor_to_vector_string_calculator_options.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = ["//mediapipe/framework:calculator_proto"],
|
||||||
|
)
|
||||||
|
|
||||||
proto_library(
|
proto_library(
|
||||||
name = "unpack_media_sequence_calculator_proto",
|
name = "unpack_media_sequence_calculator_proto",
|
||||||
srcs = ["unpack_media_sequence_calculator.proto"],
|
srcs = ["unpack_media_sequence_calculator.proto"],
|
||||||
|
@ -257,6 +264,14 @@ mediapipe_cc_proto_library(
|
||||||
deps = [":tensor_to_vector_float_calculator_options_proto"],
|
deps = [":tensor_to_vector_float_calculator_options_proto"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_cc_proto_library(
|
||||||
|
name = "tensor_to_vector_string_calculator_options_cc_proto",
|
||||||
|
srcs = ["tensor_to_vector_string_calculator_options.proto"],
|
||||||
|
cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [":tensor_to_vector_string_calculator_options_proto"],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_cc_proto_library(
|
mediapipe_cc_proto_library(
|
||||||
name = "unpack_media_sequence_calculator_cc_proto",
|
name = "unpack_media_sequence_calculator_cc_proto",
|
||||||
srcs = ["unpack_media_sequence_calculator.proto"],
|
srcs = ["unpack_media_sequence_calculator.proto"],
|
||||||
|
@ -572,9 +587,21 @@ cc_library(
|
||||||
"//mediapipe/framework/port:ret_check",
|
"//mediapipe/framework/port:ret_check",
|
||||||
] + select({
|
] + select({
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"//mediapipe/framework/port:file_helpers",
|
|
||||||
],
|
],
|
||||||
}),
|
"//mediapipe:android": [],
|
||||||
|
}) + select(
|
||||||
|
{
|
||||||
|
"//conditions:default": [
|
||||||
|
],
|
||||||
|
},
|
||||||
|
) + select(
|
||||||
|
{
|
||||||
|
"//conditions:default": [
|
||||||
|
],
|
||||||
|
"//mediapipe:android": [
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -694,6 +721,26 @@ cc_library(
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tensor_to_vector_string_calculator",
|
||||||
|
srcs = ["tensor_to_vector_string_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
":tensor_to_vector_string_calculator_options_cc_proto",
|
||||||
|
] + select({
|
||||||
|
"//conditions:default": [
|
||||||
|
"@org_tensorflow//tensorflow/core:framework",
|
||||||
|
],
|
||||||
|
"//mediapipe:android": [
|
||||||
|
"@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "unpack_media_sequence_calculator",
|
name = "unpack_media_sequence_calculator",
|
||||||
srcs = ["unpack_media_sequence_calculator.cc"],
|
srcs = ["unpack_media_sequence_calculator.cc"],
|
||||||
|
@ -864,6 +911,7 @@ cc_test(
|
||||||
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator_cc_proto",
|
"//mediapipe/calculators/tensorflow:pack_media_sequence_calculator_cc_proto",
|
||||||
"//mediapipe/framework:calculator_framework",
|
"//mediapipe/framework:calculator_framework",
|
||||||
"//mediapipe/framework:calculator_runner",
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework:timestamp",
|
||||||
"//mediapipe/framework/formats:detection_cc_proto",
|
"//mediapipe/framework/formats:detection_cc_proto",
|
||||||
"//mediapipe/framework/formats:image_frame",
|
"//mediapipe/framework/formats:image_frame",
|
||||||
"//mediapipe/framework/formats:image_frame_opencv",
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
@ -1058,6 +1106,20 @@ cc_test(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "tensor_to_vector_string_calculator_test",
|
||||||
|
srcs = ["tensor_to_vector_string_calculator_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":tensor_to_vector_string_calculator",
|
||||||
|
":tensor_to_vector_string_calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"@org_tensorflow//tensorflow/core:framework",
|
||||||
|
"@org_tensorflow//tensorflow/core:protos_all_cc",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_test(
|
cc_test(
|
||||||
name = "unpack_media_sequence_calculator_test",
|
name = "unpack_media_sequence_calculator_test",
|
||||||
srcs = ["unpack_media_sequence_calculator_test.cc"],
|
srcs = ["unpack_media_sequence_calculator_test.cc"],
|
||||||
|
|
|
@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||||
const char kImageTag[] = "IMAGE";
|
const char kImageTag[] = "IMAGE";
|
||||||
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
||||||
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
||||||
|
const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_";
|
||||||
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||||
const char kBBoxTag[] = "BBOX";
|
const char kBBoxTag[] = "BBOX";
|
||||||
const char kKeypointsTag[] = "KEYPOINTS";
|
const char kKeypointsTag[] = "KEYPOINTS";
|
||||||
|
@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
||||||
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||||
|
cc->Inputs().Tag(tag).Set<std::vector<std::string>>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
|
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
|
||||||
|
@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
mpms::ClearFeatureFloats(key, sequence_.get());
|
mpms::ClearFeatureFloats(key, sequence_.get());
|
||||||
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||||
|
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||||
|
sizeof(*kBytesFeaturePrefixTag) -
|
||||||
|
1);
|
||||||
|
mpms::ClearFeatureBytes(key, sequence_.get());
|
||||||
|
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||||
|
}
|
||||||
if (absl::StartsWith(tag, kKeypointsTag)) {
|
if (absl::StartsWith(tag, kKeypointsTag)) {
|
||||||
std::string key =
|
std::string key =
|
||||||
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
|
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
|
||||||
|
@ -243,11 +254,6 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc->Outputs().HasTag(kSequenceExampleTag)) {
|
|
||||||
cc->Outputs()
|
|
||||||
.Tag(kSequenceExampleTag)
|
|
||||||
.SetNextTimestampBound(Timestamp::Max());
|
|
||||||
}
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,7 +311,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
if (cc->Outputs().HasTag(kSequenceExampleTag)) {
|
if (cc->Outputs().HasTag(kSequenceExampleTag)) {
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag(kSequenceExampleTag)
|
.Tag(kSequenceExampleTag)
|
||||||
.Add(sequence_.release(), Timestamp::PostStream());
|
.Add(sequence_.release(), options.output_as_zero_timestamp()
|
||||||
|
? Timestamp(0ll)
|
||||||
|
: Timestamp::PostStream());
|
||||||
}
|
}
|
||||||
sequence_.reset();
|
sequence_.reset();
|
||||||
|
|
||||||
|
@ -408,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
||||||
sequence_.get());
|
sequence_.get());
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag) &&
|
||||||
|
!cc->Inputs().Tag(tag).IsEmpty()) {
|
||||||
|
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||||
|
sizeof(*kBytesFeaturePrefixTag) -
|
||||||
|
1);
|
||||||
|
mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(),
|
||||||
|
sequence_.get());
|
||||||
|
mpms::AddFeatureBytes(
|
||||||
|
key, cc->Inputs().Tag(tag).Get<std::vector<std::string>>(),
|
||||||
|
sequence_.get());
|
||||||
|
}
|
||||||
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
|
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
|
||||||
std::string key = "";
|
std::string key = "";
|
||||||
if (tag != kBBoxTag) {
|
if (tag != kBBoxTag) {
|
||||||
|
|
|
@ -65,4 +65,7 @@ message PackMediaSequenceCalculatorOptions {
|
||||||
// If true, will return an error status if an output sequence would be too
|
// If true, will return an error status if an output sequence would be too
|
||||||
// many bytes to serialize.
|
// many bytes to serialize.
|
||||||
optional bool skip_large_sequences = 7 [default = true];
|
optional bool skip_large_sequences = 7 [default = true];
|
||||||
|
|
||||||
|
// If true/false, outputs the SequenceExample at timestamp 0/PostStream.
|
||||||
|
optional bool output_as_zero_timestamp = 8 [default = false];
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "mediapipe/framework/port/gtest.h"
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
#include "mediapipe/framework/port/status_matchers.h"
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
#include "mediapipe/framework/timestamp.h"
|
||||||
#include "mediapipe/util/sequence/media_sequence.h"
|
#include "mediapipe/util/sequence/media_sequence.h"
|
||||||
#include "tensorflow/core/example/example.pb.h"
|
#include "tensorflow/core/example/example.pb.h"
|
||||||
#include "tensorflow/core/example/feature.pb.h"
|
#include "tensorflow/core/example/feature.pb.h"
|
||||||
|
@ -39,12 +40,33 @@ namespace {
|
||||||
namespace tf = ::tensorflow;
|
namespace tf = ::tensorflow;
|
||||||
namespace mpms = mediapipe::mediasequence;
|
namespace mpms = mediapipe::mediasequence;
|
||||||
|
|
||||||
|
constexpr char kBboxTag[] = "BBOX";
|
||||||
|
constexpr char kEncodedMediaStartTimestampTag[] =
|
||||||
|
"ENCODED_MEDIA_START_TIMESTAMP";
|
||||||
|
constexpr char kEncodedMediaTag[] = "ENCODED_MEDIA";
|
||||||
|
constexpr char kClassSegmentationTag[] = "CLASS_SEGMENTATION";
|
||||||
|
constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST";
|
||||||
|
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
|
||||||
|
constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
|
||||||
|
constexpr char kAudioTestTag[] = "AUDIO_TEST";
|
||||||
|
constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER";
|
||||||
|
constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST";
|
||||||
|
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||||
|
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
|
||||||
|
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
|
||||||
|
constexpr char kFloatFeatureOtherTag[] = "FLOAT_FEATURE_OTHER";
|
||||||
|
constexpr char kFloatFeatureTestTag[] = "FLOAT_FEATURE_TEST";
|
||||||
|
constexpr char kImagePrefixTag[] = "IMAGE_PREFIX";
|
||||||
|
constexpr char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||||
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
|
||||||
class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
void SetUpCalculator(const std::vector<std::string>& input_streams,
|
void SetUpCalculator(const std::vector<std::string>& input_streams,
|
||||||
const tf::Features& features,
|
const tf::Features& features,
|
||||||
bool output_only_if_all_present,
|
const bool output_only_if_all_present,
|
||||||
bool replace_instead_of_append) {
|
const bool replace_instead_of_append,
|
||||||
|
const bool output_as_zero_timestamp = false) {
|
||||||
CalculatorGraphConfig::Node config;
|
CalculatorGraphConfig::Node config;
|
||||||
config.set_calculator("PackMediaSequenceCalculator");
|
config.set_calculator("PackMediaSequenceCalculator");
|
||||||
config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence");
|
config.add_input_side_packet("SEQUENCE_EXAMPLE:input_sequence");
|
||||||
|
@ -57,6 +79,7 @@ class PackMediaSequenceCalculatorTest : public ::testing::Test {
|
||||||
*options->mutable_context_feature_map() = features;
|
*options->mutable_context_feature_map() = features;
|
||||||
options->set_output_only_if_all_present(output_only_if_all_present);
|
options->set_output_only_if_all_present(output_only_if_all_present);
|
||||||
options->set_replace_data_instead_of_append(replace_instead_of_append);
|
options->set_replace_data_instead_of_append(replace_instead_of_append);
|
||||||
|
options->set_output_as_zero_timestamp(output_as_zero_timestamp);
|
||||||
runner_ = ::absl::make_unique<CalculatorRunner>(config);
|
runner_ = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,17 +103,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoImages) {
|
||||||
for (int i = 0; i < num_images; ++i) {
|
for (int i = 0; i < num_images; ++i) {
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()->Tag("IMAGE").packets.push_back(
|
runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
|
||||||
Adopt(image_ptr.release()).At(Timestamp(i)));
|
Adopt(image_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -124,17 +147,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoPrefixedImages) {
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("IMAGE_PREFIX")
|
->Tag(kImagePrefixTag)
|
||||||
.packets.push_back(Adopt(image_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(image_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -158,21 +181,21 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
|
||||||
for (int i = 0; i < num_timesteps; ++i) {
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FLOAT_FEATURE_TEST")
|
->Tag(kFloatFeatureTestTag)
|
||||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FLOAT_FEATURE_OTHER")
|
->Tag(kFloatFeatureOtherTag)
|
||||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -194,20 +217,65 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
|
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) {
|
||||||
SetUpCalculator(
|
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||||
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"},
|
false, true);
|
||||||
{}, false, true);
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
|
||||||
|
|
||||||
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3);
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("foo", 2 << i));
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FLOAT_CONTEXT_FEATURE_TEST")
|
->Tag(kBytesFeatureTestTag)
|
||||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||||
vf_ptr = absl::make_unique<std::vector<float>>(2, 4);
|
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("bar", 2 << i));
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FLOAT_CONTEXT_FEATURE_OTHER")
|
->Tag(kBytesFeatureOtherTag)
|
||||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence));
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i));
|
||||||
|
ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i),
|
||||||
|
::testing::ElementsAreArray(
|
||||||
|
std::vector<std::string>(2, absl::StrCat("foo", 2 << i))));
|
||||||
|
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i));
|
||||||
|
ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i),
|
||||||
|
::testing::ElementsAreArray(
|
||||||
|
std::vector<std::string>(2, absl::StrCat("bar", 2 << i))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
|
||||||
|
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
|
||||||
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vf_ptr = ::absl::make_unique<std::vector<float>>(2, 2 << i);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag("FLOAT_FEATURE_TEST")
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
@ -217,6 +285,32 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
EXPECT_EQ(output_packets[0].Timestamp().Value(), 0ll);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoContextFloatLists) {
|
||||||
|
SetUpCalculator(
|
||||||
|
{"FLOAT_CONTEXT_FEATURE_TEST:test", "FLOAT_CONTEXT_FEATURE_OTHER:test2"},
|
||||||
|
{}, false, true);
|
||||||
|
auto input_sequence = absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
||||||
|
auto vf_ptr = absl::make_unique<std::vector<float>>(2, 3);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatContextFeatureTestTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
||||||
|
vf_ptr = absl::make_unique<std::vector<float>>(2, 4);
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kFloatContextFeatureOtherTag)
|
||||||
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
@ -233,7 +327,7 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
|
||||||
SetUpCalculator({"IMAGE:images"}, context, false, true);
|
SetUpCalculator({"IMAGE:images"}, context, false, true);
|
||||||
|
|
||||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
cv::Mat image(2, 3, CV_8UC3, cv::Scalar(0, 0, 255));
|
cv::Mat image(2, 3, CV_8UC3, cv::Scalar(0, 0, 255));
|
||||||
std::vector<uchar> bytes;
|
std::vector<uchar> bytes;
|
||||||
|
@ -242,13 +336,13 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksAdditionalContext) {
|
||||||
encoded_image.set_encoded_image(bytes.data(), bytes.size());
|
encoded_image.set_encoded_image(bytes.data(), bytes.size());
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()->Tag("IMAGE").packets.push_back(
|
runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
|
||||||
Adopt(image_ptr.release()).At(Timestamp(0)));
|
Adopt(image_ptr.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -281,17 +375,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoForwardFlowEncodeds) {
|
||||||
auto flow_ptr =
|
auto flow_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FORWARD_FLOW_ENCODED")
|
->Tag(kForwardFlowEncodedTag)
|
||||||
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -345,17 +439,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBBoxDetections) {
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
|
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("BBOX_PREDICTED")
|
->Tag(kBboxPredictedTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -424,11 +518,11 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithoutImageDims) {
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
|
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("BBOX_PREDICTED")
|
->Tag(kBboxPredictedTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
auto status = runner_->Run();
|
auto status = runner_->Run();
|
||||||
|
@ -472,7 +566,7 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithImages) {
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
|
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("BBOX_PREDICTED")
|
->Tag(kBboxPredictedTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
cv::Mat image(height, width, CV_8UC3, cv::Scalar(0, 0, 255));
|
cv::Mat image(height, width, CV_8UC3, cv::Scalar(0, 0, 255));
|
||||||
|
@ -487,16 +581,16 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksBBoxWithImages) {
|
||||||
for (int i = 0; i < num_images; ++i) {
|
for (int i = 0; i < num_images; ++i) {
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()->Tag("IMAGE").packets.push_back(
|
runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
|
||||||
Adopt(image_ptr.release()).At(Timestamp(i)));
|
Adopt(image_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -538,18 +632,18 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoKeypoints) {
|
||||||
absl::flat_hash_map<std::string, std::vector<std::pair<float, float>>>
|
absl::flat_hash_map<std::string, std::vector<std::pair<float, float>>>
|
||||||
points = {{"HEAD", {{0.1, 0.2}, {0.3, 0.4}}}, {"TAIL", {{0.5, 0.6}}}};
|
points = {{"HEAD", {{0.1, 0.2}, {0.3, 0.4}}}, {"TAIL", {{0.5, 0.6}}}};
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("KEYPOINTS_TEST")
|
->Tag(kKeypointsTestTag)
|
||||||
.packets.push_back(PointToForeign(&points).At(Timestamp(0)));
|
.packets.push_back(PointToForeign(&points).At(Timestamp(0)));
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("KEYPOINTS_TEST")
|
->Tag(kKeypointsTestTag)
|
||||||
.packets.push_back(PointToForeign(&points).At(Timestamp(1)));
|
.packets.push_back(PointToForeign(&points).At(Timestamp(1)));
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -589,17 +683,17 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoMaskDetections) {
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
|
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("CLASS_SEGMENTATION")
|
->Tag(kClassSegmentationTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(detections.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -638,17 +732,17 @@ TEST_F(PackMediaSequenceCalculatorTest, MissingStreamOK) {
|
||||||
auto flow_ptr =
|
auto flow_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FORWARD_FLOW_ENCODED")
|
->Tag(kForwardFlowEncodedTag)
|
||||||
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -684,11 +778,11 @@ TEST_F(PackMediaSequenceCalculatorTest, MissingStreamNotOK) {
|
||||||
auto flow_ptr =
|
auto flow_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_flow);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FORWARD_FLOW_ENCODED")
|
->Tag(kForwardFlowEncodedTag)
|
||||||
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(flow_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
absl::Status status = runner_->Run();
|
absl::Status status = runner_->Run();
|
||||||
|
@ -705,13 +799,13 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingImages) {
|
||||||
mpms::AddImageTimestamp(1, input_sequence.get());
|
mpms::AddImageTimestamp(1, input_sequence.get());
|
||||||
mpms::AddImageTimestamp(2, input_sequence.get());
|
mpms::AddImageTimestamp(2, input_sequence.get());
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -731,13 +825,13 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFlowImages) {
|
||||||
mpms::AddForwardFlowTimestamp(1, input_sequence.get());
|
mpms::AddForwardFlowTimestamp(1, input_sequence.get());
|
||||||
mpms::AddForwardFlowTimestamp(2, input_sequence.get());
|
mpms::AddForwardFlowTimestamp(2, input_sequence.get());
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -768,13 +862,52 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) {
|
||||||
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
|
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
|
||||||
ASSERT_EQ(num_timesteps,
|
ASSERT_EQ(num_timesteps,
|
||||||
mpms::GetFeatureFloatsSize("OTHER", *input_sequence));
|
mpms::GetFeatureFloatsSize("OTHER", *input_sequence));
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) {
|
||||||
|
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||||
|
false, true);
|
||||||
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("foo", 2 << i));
|
||||||
|
mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get());
|
||||||
|
mpms::AddFeatureTimestamp("TEST", i, input_sequence.get());
|
||||||
|
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("bar", 2 << i));
|
||||||
|
mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get());
|
||||||
|
mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get());
|
||||||
|
}
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("TEST", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence));
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -800,7 +933,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
|
||||||
for (int i = 0; i < num_images; ++i) {
|
for (int i = 0; i < num_images; ++i) {
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()->Tag("IMAGE").packets.push_back(
|
runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
|
||||||
Adopt(image_ptr.release()).At(Timestamp((i + 1) * 10)));
|
Adopt(image_ptr.release()).At(Timestamp((i + 1) * 10)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -812,11 +945,11 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
|
||||||
mpms::AddBBoxTimestamp("PREFIX", 9, input_sequence.get());
|
mpms::AddBBoxTimestamp("PREFIX", 9, input_sequence.get());
|
||||||
mpms::AddBBoxTimestamp("PREFIX", 22, input_sequence.get());
|
mpms::AddBBoxTimestamp("PREFIX", 22, input_sequence.get());
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner_->Outputs().Tag("SEQUENCE_EXAMPLE").packets;
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const tf::SequenceExample& output_sequence =
|
const tf::SequenceExample& output_sequence =
|
||||||
output_packets[0].Get<tf::SequenceExample>();
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
@ -853,7 +986,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
|
||||||
for (int i = 0; i < num_images; ++i) {
|
for (int i = 0; i < num_images; ++i) {
|
||||||
auto image_ptr =
|
auto image_ptr =
|
||||||
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
::absl::make_unique<OpenCvImageEncoderCalculatorResults>(encoded_image);
|
||||||
runner_->MutableInputs()->Tag("IMAGE").packets.push_back(
|
runner_->MutableInputs()->Tag(kImageTag).packets.push_back(
|
||||||
Adopt(image_ptr.release()).At(Timestamp(i)));
|
Adopt(image_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -867,7 +1000,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
|
||||||
Location::CreateRelativeBBoxLocation(0, 0.5, 0.5, 0.5)
|
Location::CreateRelativeBBoxLocation(0, 0.5, 0.5, 0.5)
|
||||||
.ConvertToProto(detection.mutable_location_data());
|
.ConvertToProto(detection.mutable_location_data());
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
runner_->MutableInputs()->Tag("BBOX").packets.push_back(
|
runner_->MutableInputs()->Tag(kBboxTag).packets.push_back(
|
||||||
Adopt(detections.release()).At(Timestamp(i)));
|
Adopt(detections.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -883,7 +1016,7 @@ TEST_F(PackMediaSequenceCalculatorTest, TestOverwritingAndReconciling) {
|
||||||
mpms::AddBBoxTrackIndex({-1}, input_sequence.get());
|
mpms::AddBBoxTrackIndex({-1}, input_sequence.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
// If the all the previous values aren't cleared, this assert will fail.
|
// If the all the previous values aren't cleared, this assert will fail.
|
||||||
MP_ASSERT_OK(runner_->Run());
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
@ -899,11 +1032,11 @@ TEST_F(PackMediaSequenceCalculatorTest, TestTooLargeInputFailsSoftly) {
|
||||||
for (int i = 0; i < num_timesteps; ++i) {
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
auto vf_ptr = ::absl::make_unique<std::vector<float>>(1000000, i);
|
auto vf_ptr = ::absl::make_unique<std::vector<float>>(1000000, i);
|
||||||
runner_->MutableInputs()
|
runner_->MutableInputs()
|
||||||
->Tag("FLOAT_FEATURE_TEST")
|
->Tag(kFloatFeatureTestTag)
|
||||||
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
.packets.push_back(Adopt(vf_ptr.release()).At(Timestamp(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
runner_->MutableSidePackets()->Tag("SEQUENCE_EXAMPLE") =
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
Adopt(input_sequence.release());
|
Adopt(input_sequence.release());
|
||||||
ASSERT_FALSE(runner_->Run().ok());
|
ASSERT_FALSE(runner_->Run().ok());
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,8 @@ namespace mediapipe {
|
||||||
namespace tf = ::tensorflow;
|
namespace tf = ::tensorflow;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kReferenceTag[] = "REFERENCE";
|
||||||
|
|
||||||
constexpr char kMatrix[] = "MATRIX";
|
constexpr char kMatrix[] = "MATRIX";
|
||||||
constexpr char kTensor[] = "TENSOR";
|
constexpr char kTensor[] = "TENSOR";
|
||||||
|
|
||||||
|
@ -68,7 +70,8 @@ class TensorToMatrixCalculatorTest : public ::testing::Test {
|
||||||
if (include_rate) {
|
if (include_rate) {
|
||||||
header->set_packet_rate(1.0);
|
header->set_packet_rate(1.0);
|
||||||
}
|
}
|
||||||
runner_->MutableInputs()->Tag("REFERENCE").header = Adopt(header.release());
|
runner_->MutableInputs()->Tag(kReferenceTag).header =
|
||||||
|
Adopt(header.release());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<CalculatorRunner> runner_;
|
std::unique_ptr<CalculatorRunner> runner_;
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Calculator converts from one-dimensional Tensor of DT_STRING to
|
||||||
|
// vector<std::string> OR from (batched) two-dimensional Tensor of DT_STRING to
|
||||||
|
// vector<vector<std::string>.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/tensorflow/tensor_to_vector_string_calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "tensorflow/core/framework/tensor.h"
|
||||||
|
#include "tensorflow/core/framework/types.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace tf = ::tensorflow;
|
||||||
|
|
||||||
|
class TensorToVectorStringCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc);
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
TensorToVectorStringCalculatorOptions options_;
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(TensorToVectorStringCalculator);
|
||||||
|
|
||||||
|
absl::Status TensorToVectorStringCalculator::GetContract(
|
||||||
|
CalculatorContract* cc) {
|
||||||
|
// Start with only one input packet.
|
||||||
|
RET_CHECK_EQ(cc->Inputs().NumEntries(), 1)
|
||||||
|
<< "Only one input stream is supported.";
|
||||||
|
cc->Inputs().Index(0).Set<tf::Tensor>(
|
||||||
|
// Input Tensor
|
||||||
|
);
|
||||||
|
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1)
|
||||||
|
<< "Only one output stream is supported.";
|
||||||
|
const auto& options = cc->Options<TensorToVectorStringCalculatorOptions>();
|
||||||
|
if (options.tensor_is_2d()) {
|
||||||
|
RET_CHECK(!options.flatten_nd());
|
||||||
|
cc->Outputs().Index(0).Set<std::vector<std::vector<std::string>>>(
|
||||||
|
/* "Output vector<vector<std::string>>." */);
|
||||||
|
} else {
|
||||||
|
cc->Outputs().Index(0).Set<std::vector<std::string>>(
|
||||||
|
// Output vector<std::string>.
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorToVectorStringCalculator::Open(CalculatorContext* cc) {
|
||||||
|
options_ = cc->Options<TensorToVectorStringCalculatorOptions>();
|
||||||
|
|
||||||
|
// Inform mediapipe that this calculator produces an output at time t for
|
||||||
|
// each input received at time t (i.e. this calculator does not buffer
|
||||||
|
// inputs). This enables mediapipe to propagate time of arrival estimates in
|
||||||
|
// mediapipe graphs through this calculator.
|
||||||
|
cc->SetOffset(/*offset=*/0);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorToVectorStringCalculator::Process(CalculatorContext* cc) {
|
||||||
|
const tf::Tensor& input_tensor =
|
||||||
|
cc->Inputs().Index(0).Value().Get<tf::Tensor>();
|
||||||
|
RET_CHECK(tf::DT_STRING == input_tensor.dtype())
|
||||||
|
<< "expected DT_STRING input but got "
|
||||||
|
<< tensorflow::DataTypeString(input_tensor.dtype());
|
||||||
|
|
||||||
|
if (options_.tensor_is_2d()) {
|
||||||
|
RET_CHECK(2 == input_tensor.dims())
|
||||||
|
<< "Expected 2-dimensional Tensor, but the tensor shape is: "
|
||||||
|
<< input_tensor.shape().DebugString();
|
||||||
|
auto output = absl::make_unique<std::vector<std::vector<std::string>>>(
|
||||||
|
input_tensor.dim_size(0),
|
||||||
|
std::vector<std::string>(input_tensor.dim_size(1)));
|
||||||
|
for (int i = 0; i < input_tensor.dim_size(0); ++i) {
|
||||||
|
auto& instance_output = output->at(i);
|
||||||
|
const auto& slice =
|
||||||
|
input_tensor.Slice(i, i + 1).unaligned_flat<tensorflow::tstring>();
|
||||||
|
for (int j = 0; j < input_tensor.dim_size(1); ++j) {
|
||||||
|
instance_output.at(j) = slice(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
|
||||||
|
} else {
|
||||||
|
if (!options_.flatten_nd()) {
|
||||||
|
RET_CHECK(1 == input_tensor.dims())
|
||||||
|
<< "`flatten_nd` is not set. Expected 1-dimensional Tensor, but the "
|
||||||
|
<< "tensor shape is: " << input_tensor.shape().DebugString();
|
||||||
|
}
|
||||||
|
auto output =
|
||||||
|
absl::make_unique<std::vector<std::string>>(input_tensor.NumElements());
|
||||||
|
const auto& tensor_values = input_tensor.flat<tensorflow::tstring>();
|
||||||
|
for (int i = 0; i < input_tensor.NumElements(); ++i) {
|
||||||
|
output->at(i) = tensor_values(i);
|
||||||
|
}
|
||||||
|
cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp());
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,33 @@
|
||||||
|
// Copyright 2019 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message TensorToVectorStringCalculatorOptions {
|
||||||
|
extend mediapipe.CalculatorOptions {
|
||||||
|
optional TensorToVectorStringCalculatorOptions ext = 386534187;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If true, unpack a 2d tensor (matrix) into a vector<vector<string>>. If
|
||||||
|
// false, convert a 1d tensor (vector) into a vector<string>.
|
||||||
|
optional bool tensor_is_2d = 1 [default = false];
|
||||||
|
|
||||||
|
// If true, an N-D tensor will be flattened to a vector<string>. This is
|
||||||
|
// exclusive with tensor_is_2d.
|
||||||
|
optional bool flatten_nd = 2 [default = false];
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user