Project import generated by Copybara.
GitOrigin-RevId: 1610e588e497817fae2d9a458093ab6a370e2972
This commit is contained in:
parent
b899d17f18
commit
710fb3de58
25
WORKSPACE
25
WORKSPACE
|
@ -331,7 +331,9 @@ load("@rules_jvm_external//:defs.bzl", "maven_install")
|
||||||
maven_install(
|
maven_install(
|
||||||
artifacts = [
|
artifacts = [
|
||||||
"androidx.concurrent:concurrent-futures:1.0.0-alpha03",
|
"androidx.concurrent:concurrent-futures:1.0.0-alpha03",
|
||||||
"androidx.lifecycle:lifecycle-common:2.2.0",
|
"androidx.lifecycle:lifecycle-common:2.3.1",
|
||||||
|
"androidx.activity:activity:1.2.2",
|
||||||
|
"androidx.fragment:fragment:1.3.4",
|
||||||
"androidx.annotation:annotation:aar:1.1.0",
|
"androidx.annotation:annotation:aar:1.1.0",
|
||||||
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
|
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
|
||||||
"androidx.camera:camera-core:1.0.0-beta10",
|
"androidx.camera:camera-core:1.0.0-beta10",
|
||||||
|
@ -376,9 +378,9 @@ http_archive(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tensorflow repo should always go after the other external dependencies.
|
# Tensorflow repo should always go after the other external dependencies.
|
||||||
# 2021-06-07
|
# 2021-07-29
|
||||||
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
|
_TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3"
|
||||||
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
|
_TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56"
|
||||||
http_archive(
|
http_archive(
|
||||||
name = "org_tensorflow",
|
name = "org_tensorflow",
|
||||||
urls = [
|
urls = [
|
||||||
|
@ -399,3 +401,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
|
||||||
tf_workspace3()
|
tf_workspace3()
|
||||||
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
|
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
|
||||||
tf_workspace2()
|
tf_workspace2()
|
||||||
|
|
||||||
|
# Edge TPU
|
||||||
|
http_archive(
|
||||||
|
name = "libedgetpu",
|
||||||
|
sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80",
|
||||||
|
strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7",
|
||||||
|
urls = [
|
||||||
|
"https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz"
|
||||||
|
],
|
||||||
|
)
|
||||||
|
load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies")
|
||||||
|
libedgetpu_dependencies()
|
||||||
|
|
||||||
|
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
|
||||||
|
cc_crosstool(name = "crosstool")
|
||||||
|
|
|
@ -16,12 +16,14 @@ nav_order: 1
|
||||||
|
|
||||||
Please follow instructions below to build Android example apps in the supported
|
Please follow instructions below to build Android example apps in the supported
|
||||||
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
|
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
|
||||||
example apps, start from [Hello World! on Android](./hello_world_android.md). To
|
example apps, start from [Hello World! on Android](./hello_world_android.md).
|
||||||
incorporate MediaPipe into an existing Android Studio project, see these
|
|
||||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
|
||||||
Gradle.
|
|
||||||
|
|
||||||
## Building Android example apps
|
To incorporate MediaPipe into Android Studio projects, see these
|
||||||
|
[instructions](./android_solutions.md) to use the MediaPipe Android Solution
|
||||||
|
APIs (currently in alpha) that are now available in
|
||||||
|
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||||
|
|
||||||
|
## Building Android example apps with Bazel
|
||||||
|
|
||||||
### Prerequisite
|
### Prerequisite
|
||||||
|
|
||||||
|
@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
|
||||||
android_sdk_repository() in the
|
android_sdk_repository() in the
|
||||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
|
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
|
||||||
|
|
||||||
Please verify all the necessary packages are installed.
|
|
||||||
|
|
||||||
* Android SDK Platform API Level 28 or 29
|
|
||||||
* Android SDK Build-Tools 28 or 29
|
|
||||||
* Android SDK Platform-Tools 28 or 29
|
|
||||||
* Android SDK Tools 26.1.1
|
|
||||||
* Android NDK 19c or above
|
|
||||||
|
|
||||||
### Option 1: Build with Bazel in Command Line
|
|
||||||
|
|
||||||
Tip: You can run this
|
Tip: You can run this
|
||||||
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
|
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
|
||||||
to build (and install) all MediaPipe Android example apps.
|
to build (and install) all MediaPipe Android example apps.
|
||||||
|
@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps.
|
||||||
```bash
|
```bash
|
||||||
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
|
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option 2: Build with Bazel in Android Studio
|
|
||||||
|
|
||||||
The MediaPipe project can be imported into Android Studio using the Bazel
|
|
||||||
plugins. This allows the MediaPipe examples to be built and modified in Android
|
|
||||||
Studio.
|
|
||||||
|
|
||||||
To incorporate MediaPipe into an existing Android Studio project, see these
|
|
||||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
|
||||||
Gradle.
|
|
||||||
|
|
||||||
The steps below use Android Studio 3.5 to build and install a MediaPipe example
|
|
||||||
app:
|
|
||||||
|
|
||||||
1. Install and launch Android Studio 3.5.
|
|
||||||
|
|
||||||
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
|
|
||||||
|
|
||||||
* Verify that Android SDK Platform API Level 28 or 29 is installed.
|
|
||||||
* Take note of the Android SDK Location, e.g.,
|
|
||||||
`/usr/local/home/Android/Sdk`.
|
|
||||||
|
|
||||||
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
|
|
||||||
|
|
||||||
* Verify that Android SDK Build-Tools 28 or 29 is installed.
|
|
||||||
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
|
|
||||||
* Verify that Android SDK Tools 26.1.1 is installed.
|
|
||||||
* Verify that Android NDK 19c or above is installed.
|
|
||||||
* Take note of the Android NDK Location, e.g.,
|
|
||||||
`/usr/local/home/Android/Sdk/ndk-bundle` or
|
|
||||||
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
|
|
||||||
|
|
||||||
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
|
|
||||||
to the installed SDK and NDK.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export ANDROID_HOME=/usr/local/home/Android/Sdk
|
|
||||||
|
|
||||||
# If the NDK libraries are installed by a previous version of Android Studio, do
|
|
||||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
|
|
||||||
# If the NDK libraries are installed by Android Studio 3.5, do
|
|
||||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Select `Configure` -> `Plugins` to install `Bazel`.
|
|
||||||
|
|
||||||
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
|
|
||||||
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
|
|
||||||
binary location` to be the same as the output of `$ which bazel`.
|
|
||||||
|
|
||||||
7. Select `Import Bazel Project`.
|
|
||||||
|
|
||||||
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
|
|
||||||
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
|
|
||||||
`Next`.
|
|
||||||
* Modify `Project View` to be the following and select `Finish`.
|
|
||||||
|
|
||||||
```
|
|
||||||
directories:
|
|
||||||
# read project settings, e.g., .bazelrc
|
|
||||||
.
|
|
||||||
-mediapipe/objc
|
|
||||||
-mediapipe/examples/ios
|
|
||||||
|
|
||||||
targets:
|
|
||||||
//mediapipe/examples/android/...:all
|
|
||||||
//mediapipe/java/...:all
|
|
||||||
|
|
||||||
android_sdk_platform: android-29
|
|
||||||
|
|
||||||
sync_flags:
|
|
||||||
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
|
||||||
```
|
|
||||||
|
|
||||||
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
|
|
||||||
|
|
||||||
Note: Even after doing step 4, if you still see the error: `"no such package
|
|
||||||
'@androidsdk//': Either the path attribute of android_sdk_repository or the
|
|
||||||
ANDROID_HOME environment variable must be set."`, please modify the
|
|
||||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
|
|
||||||
file to point to your SDK and NDK library locations, as below:
|
|
||||||
|
|
||||||
```
|
|
||||||
android_sdk_repository(
|
|
||||||
name = "androidsdk",
|
|
||||||
path = "/path/to/android/sdk"
|
|
||||||
)
|
|
||||||
|
|
||||||
android_ndk_repository(
|
|
||||||
name = "androidndk",
|
|
||||||
path = "/path/to/android/ndk"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
9. Connect an Android device to the workstation.
|
|
||||||
|
|
||||||
10. Select `Run...` -> `Edit Configurations...`.
|
|
||||||
|
|
||||||
* Select `Templates` -> `Bazel Command`.
|
|
||||||
* Enter Target Expression:
|
|
||||||
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
|
|
||||||
* Enter Bazel command: `mobile-install`.
|
|
||||||
* Enter Bazel flags: `-c opt --config=android_arm64`.
|
|
||||||
* Press the `[+]` button to add the new configuration.
|
|
||||||
* Select `Run` to run the example app on the connected Android device.
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ layout: default
|
||||||
title: MediaPipe Android Archive
|
title: MediaPipe Android Archive
|
||||||
parent: MediaPipe on Android
|
parent: MediaPipe on Android
|
||||||
grand_parent: Getting Started
|
grand_parent: Getting Started
|
||||||
nav_order: 2
|
nav_order: 3
|
||||||
---
|
---
|
||||||
|
|
||||||
# MediaPipe Android Archive
|
# MediaPipe Android Archive
|
||||||
|
|
79
docs/getting_started/android_solutions.md
Normal file
79
docs/getting_started/android_solutions.md
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
---
|
||||||
|
layout: default
|
||||||
|
title: Android Solutions
|
||||||
|
parent: MediaPipe on Android
|
||||||
|
grand_parent: Getting Started
|
||||||
|
nav_order: 2
|
||||||
|
---
|
||||||
|
|
||||||
|
# Android Solution APIs
|
||||||
|
{: .no_toc }
|
||||||
|
|
||||||
|
1. TOC
|
||||||
|
{:toc}
|
||||||
|
---
|
||||||
|
|
||||||
|
Please follow instructions below to use the MediaPipe Solution APIs in Android
|
||||||
|
Studio projects and build the Android example apps in the supported MediaPipe
|
||||||
|
[solutions](../solutions/solutions.md).
|
||||||
|
|
||||||
|
## Integrate MediaPipe Android Solutions in Android Studio
|
||||||
|
|
||||||
|
MediaPipe Android Solution APIs (currently in alpha) are now available in
|
||||||
|
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||||
|
To incorporate MediaPipe Android Solutions into an Android Studio project, add
|
||||||
|
the following into the project's Gradle dependencies:
|
||||||
|
|
||||||
|
```
|
||||||
|
dependencies {
|
||||||
|
// MediaPipe solution-core is the foundation of any MediaPipe solutions.
|
||||||
|
implementation 'com.google.mediapipe:solution-core:latest.release'
|
||||||
|
// Optional: MediaPipe Hands solution.
|
||||||
|
implementation 'com.google.mediapipe:hands:latest.release'
|
||||||
|
// Optional: MediaPipe FaceMesh solution.
|
||||||
|
implementation 'com.google.mediapipe:facemesh:latest.release'
|
||||||
|
// MediaPipe deps
|
||||||
|
implementation 'com.google.flogger:flogger:latest.release'
|
||||||
|
implementation 'com.google.flogger:flogger-system-backend:latest.release'
|
||||||
|
implementation 'com.google.guava:guava:27.0.1-android'
|
||||||
|
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||||
|
// CameraX core library
|
||||||
|
def camerax_version = "1.0.0-beta10"
|
||||||
|
implementation "androidx.camera:camera-core:$camerax_version"
|
||||||
|
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||||
|
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
See the detailed solutions API usage examples for different use cases in the
|
||||||
|
solution example apps'
|
||||||
|
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions).
|
||||||
|
If the prebuilt maven packages are not sufficient, building the MediaPipe
|
||||||
|
Android archive library locally by following these
|
||||||
|
[instructions](./android_archive_library.md).
|
||||||
|
|
||||||
|
## Build solution example apps in Android Studio
|
||||||
|
|
||||||
|
1. Open Android Studio Arctic Fox on Linux, macOS, or Windows.
|
||||||
|
|
||||||
|
2. Import mediapipe/examples/android/solutions directory into Android Studio.
|
||||||
|
|
||||||
|
![Screenshot](../images/import_mp_android_studio_project.png)
|
||||||
|
|
||||||
|
3. For Windows users, run `create_win_symlinks.bat` as administrator to create
|
||||||
|
res directory symlinks.
|
||||||
|
|
||||||
|
![Screenshot](../images/run_create_win_symlinks.png)
|
||||||
|
|
||||||
|
4. Select "File" -> "Sync Project with Gradle Files" to sync project.
|
||||||
|
|
||||||
|
5. Run solution example app in Android Studio.
|
||||||
|
|
||||||
|
![Screenshot](../images/run_android_solution_app.png)
|
||||||
|
|
||||||
|
6. (Optional) Run solutions on CPU.
|
||||||
|
|
||||||
|
MediaPipe solution example apps run the pipeline and the model inference on
|
||||||
|
GPU by default. If needed, for example to run the apps on Android Emulator,
|
||||||
|
set the `RUN_ON_GPU` boolean variable to `false` in the app's
|
||||||
|
MainActivity.java to run the pipeline and the model inference on CPU.
|
|
@ -43,104 +43,189 @@ install --user six`.
|
||||||
|
|
||||||
3. Install OpenCV and FFmpeg.
|
3. Install OpenCV and FFmpeg.
|
||||||
|
|
||||||
Option 1. Use package manager tool to install the pre-compiled OpenCV
|
**Option 1**. Use package manager tool to install the pre-compiled OpenCV
|
||||||
libraries. FFmpeg will be installed via libopencv-video-dev.
|
libraries. FFmpeg will be installed via `libopencv-video-dev`.
|
||||||
|
|
||||||
Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take
|
OS | OpenCV
|
||||||
option 2 or 3 to install OpenCV 3 or above.
|
-------------------- | ------
|
||||||
|
Debian 9 (stretch) | 2.4
|
||||||
|
Debian 10 (buster) | 3.2
|
||||||
|
Debian 11 (bullseye) | 4.5
|
||||||
|
Ubuntu 16.04 LTS | 2.4
|
||||||
|
Ubuntu 18.04 LTS | 3.2
|
||||||
|
Ubuntu 20.04 LTS | 4.2
|
||||||
|
Ubuntu 20.04 LTS | 4.2
|
||||||
|
Ubuntu 21.04 | 4.5
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
$ sudo apt-get install -y \
|
||||||
libopencv-calib3d-dev libopencv-features2d-dev \
|
libopencv-core-dev \
|
||||||
libopencv-imgproc-dev libopencv-video-dev
|
libopencv-highgui-dev \
|
||||||
|
libopencv-calib3d-dev \
|
||||||
|
libopencv-features2d-dev \
|
||||||
|
libopencv-imgproc-dev \
|
||||||
|
libopencv-video-dev
|
||||||
```
|
```
|
||||||
|
|
||||||
Debian 9 and Ubuntu 18.04 install the packages in
|
MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured
|
||||||
`/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and
|
for OpenCV 2/3 and should work correctly on any architecture:
|
||||||
[`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04
|
|
||||||
may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow
|
|
||||||
the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
|
||||||
[`ffmpeg_linux.BUILD`] files accordingly.
|
|
||||||
|
|
||||||
Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the
|
|
||||||
library path needs to be modified like the following:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_opencv",
|
||||||
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
|
path = "/usr",
|
||||||
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 2/3 installed from Debian package
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
linkopts = [
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source
|
For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account
|
||||||
and modify MediaPipe's OpenCV config.
|
current architecture:
|
||||||
|
|
||||||
Option 3. Follow OpenCV's
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_opencv",
|
||||||
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
|
path = "/usr",
|
||||||
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 4 installed from Debian package
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
hdrs = glob([
|
||||||
|
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||||
|
# "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||||
|
# "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h",
|
||||||
|
# "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||||
|
"include/opencv4/opencv2/**/*.h*",
|
||||||
|
]),
|
||||||
|
includes = [
|
||||||
|
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||||
|
# "include/aarch64-linux-gnu/opencv4/",
|
||||||
|
# "include/arm-linux-gnueabihf/opencv4/",
|
||||||
|
# "include/x86_64-linux-gnu/opencv4/",
|
||||||
|
"include/opencv4/",
|
||||||
|
],
|
||||||
|
linkopts = [
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from
|
||||||
|
source and modify MediaPipe's OpenCV config. This option will do all steps
|
||||||
|
defined in Option 3 automatically.
|
||||||
|
|
||||||
|
**Option 3**. Follow OpenCV's
|
||||||
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
|
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
|
||||||
to manually build OpenCV from source code.
|
to manually build OpenCV from source code.
|
||||||
|
|
||||||
Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point
|
||||||
[`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg
|
MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to
|
||||||
libraries. For example if OpenCV and FFmpeg are both manually installed in
|
`/usr/local/` which is recommended by default.
|
||||||
"/usr/local/", you will need to update: (1) the "linux_opencv" and
|
|
||||||
"linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv"
|
OpenCV 2/3 setup:
|
||||||
cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg"
|
|
||||||
cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
new_local_repository(
|
new_local_repository(
|
||||||
name = "linux_opencv",
|
name = "linux_opencv",
|
||||||
build_file = "@//third_party:opencv_linux.BUILD",
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
path = "/usr/local",
|
path = "/usr/local",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local
|
||||||
|
cc_library(
|
||||||
|
name = "opencv",
|
||||||
|
linkopts = [
|
||||||
|
"-L/usr/local/lib",
|
||||||
|
"-l:libopencv_core.so",
|
||||||
|
"-l:libopencv_calib3d.so",
|
||||||
|
"-l:libopencv_features2d.so",
|
||||||
|
"-l:libopencv_highgui.so",
|
||||||
|
"-l:libopencv_imgcodecs.so",
|
||||||
|
"-l:libopencv_imgproc.so",
|
||||||
|
"-l:libopencv_video.so",
|
||||||
|
"-l:libopencv_videoio.so",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
OpenCV 4 setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
new_local_repository(
|
new_local_repository(
|
||||||
name = "linux_ffmpeg",
|
name = "linux_opencv",
|
||||||
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
build_file = "@//third_party:opencv_linux.BUILD",
|
||||||
path = "/usr/local",
|
path = "/usr/local",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# opencv_linux.BUILD for OpenCV 4 installed to /usr/local
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "opencv",
|
name = "opencv",
|
||||||
srcs = glob(
|
hdrs = glob([
|
||||||
[
|
"include/opencv4/opencv2/**/*.h*",
|
||||||
"lib/libopencv_core.so",
|
]),
|
||||||
"lib/libopencv_highgui.so",
|
includes = [
|
||||||
"lib/libopencv_imgcodecs.so",
|
"include/opencv4/",
|
||||||
"lib/libopencv_imgproc.so",
|
],
|
||||||
"lib/libopencv_video.so",
|
linkopts = [
|
||||||
"lib/libopencv_videoio.so",
|
"-L/usr/local/lib",
|
||||||
],
|
"-l:libopencv_core.so",
|
||||||
),
|
"-l:libopencv_calib3d.so",
|
||||||
hdrs = glob([
|
"-l:libopencv_features2d.so",
|
||||||
# For OpenCV 3.x
|
"-l:libopencv_highgui.so",
|
||||||
"include/opencv2/**/*.h*",
|
"-l:libopencv_imgcodecs.so",
|
||||||
# For OpenCV 4.x
|
"-l:libopencv_imgproc.so",
|
||||||
# "include/opencv4/opencv2/**/*.h*",
|
"-l:libopencv_video.so",
|
||||||
]),
|
"-l:libopencv_videoio.so",
|
||||||
includes = [
|
],
|
||||||
# For OpenCV 3.x
|
)
|
||||||
"include/",
|
```
|
||||||
# For OpenCV 4.x
|
|
||||||
# "include/opencv4/",
|
Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work
|
||||||
],
|
for any architecture:
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
```bash
|
||||||
|
# WORKSPACE
|
||||||
|
new_local_repository(
|
||||||
|
name = "linux_ffmpeg",
|
||||||
|
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
||||||
|
path = "/usr"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ffmpeg_linux.BUILD for FFmpeg installed from Debian package
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "libffmpeg",
|
name = "libffmpeg",
|
||||||
srcs = glob(
|
linkopts = [
|
||||||
[
|
"-l:libavcodec.so",
|
||||||
"lib/libav*.so",
|
"-l:libavformat.so",
|
||||||
],
|
"-l:libavutil.so",
|
||||||
),
|
],
|
||||||
hdrs = glob(["include/libav*/*.h"]),
|
|
||||||
includes = ["include"],
|
|
||||||
linkopts = [
|
|
||||||
"-lavcodec",
|
|
||||||
"-lavformat",
|
|
||||||
"-lavutil",
|
|
||||||
],
|
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,16 @@ Solution | NPM Package | Example
|
||||||
Click on a solution link above for more information, including API and code
|
Click on a solution link above for more information, including API and code
|
||||||
snippets.
|
snippets.
|
||||||
|
|
||||||
|
### Supported plaforms:
|
||||||
|
|
||||||
|
| Browser | Platform | Notes |
|
||||||
|
| ------- | ----------------------- | -------------------------------------- |
|
||||||
|
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
|
||||||
|
| | | unsupported. |
|
||||||
|
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
||||||
|
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
||||||
|
| | | MacBook |
|
||||||
|
|
||||||
The quickest way to get acclimated is to look at the examples above. Each demo
|
The quickest way to get acclimated is to look at the examples above. Each demo
|
||||||
has a link to a [CodePen][codepen] so that you can edit the code and try it
|
has a link to a [CodePen][codepen] so that you can edit the code and try it
|
||||||
yourself. We have included a number of utility packages to help you get started:
|
yourself. We have included a number of utility packages to help you get started:
|
||||||
|
|
BIN
docs/images/import_mp_android_studio_project.png
Normal file
BIN
docs/images/import_mp_android_studio_project.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 128 KiB |
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
Binary file not shown.
BIN
docs/images/run_android_solution_app.png
Normal file
BIN
docs/images/run_android_solution_app.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 258 KiB |
BIN
docs/images/run_create_win_symlinks.png
Normal file
BIN
docs/images/run_create_win_symlinks.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
|
@ -278,6 +278,7 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_face_mesh = mp.solutions.face_mesh
|
mp_face_mesh = mp.solutions.face_mesh
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
|
@ -301,9 +302,17 @@ with mp_face_mesh.FaceMesh(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image=annotated_image,
|
image=annotated_image,
|
||||||
landmark_list=face_landmarks,
|
landmark_list=face_landmarks,
|
||||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||||
landmark_drawing_spec=drawing_spec,
|
landmark_drawing_spec=None,
|
||||||
connection_drawing_spec=drawing_spec)
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_tesselation_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=annotated_image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
|
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
|
@ -335,9 +344,17 @@ with mp_face_mesh.FaceMesh(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image=image,
|
image=image,
|
||||||
landmark_list=face_landmarks,
|
landmark_list=face_landmarks,
|
||||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||||
landmark_drawing_spec=drawing_spec,
|
landmark_drawing_spec=None,
|
||||||
connection_drawing_spec=drawing_spec)
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_tesselation_style())
|
||||||
|
mp_drawing.draw_landmarks(
|
||||||
|
image=image,
|
||||||
|
landmark_list=face_landmarks,
|
||||||
|
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
cv2.imshow('MediaPipe FaceMesh', image)
|
cv2.imshow('MediaPipe FaceMesh', image)
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
|
@ -423,6 +440,200 @@ camera.start();
|
||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Android Solution API
|
||||||
|
|
||||||
|
Please first follow general
|
||||||
|
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
|
||||||
|
to add MediaPipe Gradle dependencies, then try the FaceMash solution API in the
|
||||||
|
companion
|
||||||
|
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh)
|
||||||
|
following
|
||||||
|
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
|
||||||
|
and learn more in the usage example below.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [maxNumFaces](#max_num_faces)
|
||||||
|
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
|
||||||
|
#### Camera Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For camera input and result rendering with OpenGL.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
|
||||||
|
.setMaxNumFaces(1) // setStaticImageMode(false)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
facemesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
|
||||||
|
CameraInput cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(
|
||||||
|
textureFrame -> facemesh.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
facemesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseLandmark.getX(), noseLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceMeshResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the GLSurfaceView is attached.
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
facemesh.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Image Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For reading images from gallery and drawing the output in an ImageView.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE) // API soon to become
|
||||||
|
.setMaxNumFaces(1) // setStaticImageMode(true)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
|
||||||
|
// Connects MediaPipe FaceMesh to the user-defined ImageView instance that allows
|
||||||
|
// users to have the custom drawing of the output landmarks on it.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
|
||||||
|
// as an example.
|
||||||
|
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
|
||||||
|
facemesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
int width = faceMeshResult.inputBitmap().getWidth();
|
||||||
|
int height = faceMeshResult.inputBitmap().getHeight();
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
|
||||||
|
noseLandmark.getX() * width, noseLandmark.getY() * height));
|
||||||
|
// Request canvas drawing.
|
||||||
|
imageView.setFaceMeshResult(faceMeshResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
facemesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||||
|
|
||||||
|
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||||
|
ActivityResultLauncher<Intent> imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
facemesh.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent gallery = new Intent(
|
||||||
|
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||||
|
imageGetter.launch(gallery);
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Video Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For video input and result rendering with OpenGL.
|
||||||
|
FaceMeshOptions faceMeshOptions =
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
|
||||||
|
.setMaxNumFaces(1) // setStaticImageMode(false)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||||
|
facemesh.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
|
||||||
|
VideoInput videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(
|
||||||
|
textureFrame -> facemesh.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
facemesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
NormalizedLandmark noseLandmark =
|
||||||
|
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseLandmark.getX(), noseLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(faceMeshResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
ActivityResultLauncher<Intent> videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
facemesh.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||||
|
videoGetter.launch(gallery);
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
|
|
@ -219,8 +219,8 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_hands = mp.solutions.hands
|
mp_hands = mp.solutions.hands
|
||||||
drawing_styles = mp.solutions.drawing_styles
|
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
IMAGE_FILES = []
|
IMAGE_FILES = []
|
||||||
|
@ -249,9 +249,11 @@ with mp_hands.Hands(
|
||||||
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
|
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
|
||||||
)
|
)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
annotated_image,
|
||||||
drawing_styles.get_default_hand_landmark_style(),
|
hand_landmarks,
|
||||||
drawing_styles.get_default_hand_connection_style())
|
mp_hands.HAND_CONNECTIONS,
|
||||||
|
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||||
|
mp_drawing_styles.get_default_hand_connections_style())
|
||||||
cv2.imwrite(
|
cv2.imwrite(
|
||||||
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
||||||
|
|
||||||
|
@ -281,9 +283,11 @@ with mp_hands.Hands(
|
||||||
if results.multi_hand_landmarks:
|
if results.multi_hand_landmarks:
|
||||||
for hand_landmarks in results.multi_hand_landmarks:
|
for hand_landmarks in results.multi_hand_landmarks:
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
image,
|
||||||
drawing_styles.get_default_hand_landmark_style(),
|
hand_landmarks,
|
||||||
drawing_styles.get_default_hand_connection_style())
|
mp_hands.HAND_CONNECTIONS,
|
||||||
|
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||||
|
mp_drawing_styles.get_default_hand_connections_style())
|
||||||
cv2.imshow('MediaPipe Hands', image)
|
cv2.imshow('MediaPipe Hands', image)
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
|
@ -364,6 +368,200 @@ camera.start();
|
||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Android Solution API
|
||||||
|
|
||||||
|
Please first follow general
|
||||||
|
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
|
||||||
|
to add MediaPipe Gradle dependencies, then try the Hands solution API in the
|
||||||
|
companion
|
||||||
|
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands)
|
||||||
|
following
|
||||||
|
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
|
||||||
|
and learn more in usage example below.
|
||||||
|
|
||||||
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [staticImageMode](#static_image_mode)
|
||||||
|
* [maxNumHands](#max_num_hands)
|
||||||
|
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
|
||||||
|
#### Camera Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For camera input and result rendering with OpenGL.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
|
||||||
|
.setMaxNumHands(1) // setStaticImageMode(false)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
||||||
|
CameraInput cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(
|
||||||
|
textureFrame -> hands.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||||
|
handsResult, 0, HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
wristLandmark.getX(), wristLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(handsResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the GLSurfaceView is attached.
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
hands.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Image Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For reading images from gallery and drawing the output in an ImageView.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setMode(HandsOptions.STATIC_IMAGE_MODE) // API soon to become
|
||||||
|
.setMaxNumHands(1) // setStaticImageMode(true)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
|
||||||
|
// Connects MediaPipe Hands to the user-defined ImageView instance that allows
|
||||||
|
// users to have the custom drawing of the output landmarks on it.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
|
||||||
|
// as an example.
|
||||||
|
HandsResultImageView imageView = new HandsResultImageView(this);
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
int width = handsResult.inputBitmap().getWidth();
|
||||||
|
int height = handsResult.inputBitmap().getHeight();
|
||||||
|
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||||
|
handsResult, 0, HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
|
||||||
|
wristLandmark.getX() * width, wristLandmark.getY() * height));
|
||||||
|
// Request canvas drawing.
|
||||||
|
imageView.setHandsResult(handsResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||||
|
ActivityResultLauncher<Intent> imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
hands.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent gallery = new Intent(
|
||||||
|
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||||
|
imageGetter.launch(gallery);
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Video Input
|
||||||
|
|
||||||
|
```java
|
||||||
|
// For video input and result rendering with OpenGL.
|
||||||
|
HandsOptions handsOptions =
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
|
||||||
|
.setMaxNumHands(1) // setStaticImageMode(false)
|
||||||
|
.setRunOnGpu(true).build();
|
||||||
|
Hands hands = new Hands(this, handsOptions);
|
||||||
|
hands.setErrorListener(
|
||||||
|
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
|
||||||
|
VideoInput videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(
|
||||||
|
textureFrame -> hands.send(textureFrame));
|
||||||
|
|
||||||
|
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||||
|
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||||
|
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||||
|
// as an example.
|
||||||
|
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(
|
||||||
|
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
|
||||||
|
hands.setResultListener(
|
||||||
|
handsResult -> {
|
||||||
|
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||||
|
handsResult, 0, HandLandmark.WRIST);
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
wristLandmark.getX(), wristLandmark.getY()));
|
||||||
|
// Request GL rendering.
|
||||||
|
glSurfaceView.setRenderData(handsResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
ActivityResultLauncher<Intent> videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
hands.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||||
|
videoGetter.launch(gallery);
|
||||||
|
```
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
||||||
Please first see general instructions for
|
Please first see general instructions for
|
||||||
|
|
|
@ -225,6 +225,7 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_holistic = mp.solutions.holistic
|
mp_holistic = mp.solutions.holistic
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
|
@ -247,13 +248,18 @@ with mp_holistic.Holistic(
|
||||||
# Draw pose, left and right hands, and face landmarks on the image.
|
# Draw pose, left and right hands, and face landmarks on the image.
|
||||||
annotated_image = image.copy()
|
annotated_image = image.copy()
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
annotated_image,
|
||||||
|
results.face_landmarks,
|
||||||
|
mp_holistic.FACEMESH_TESSELATION,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_tesselation_style())
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
annotated_image,
|
||||||
mp_drawing.draw_landmarks(
|
results.pose_landmarks,
|
||||||
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
mp_holistic.POSE_CONNECTIONS,
|
||||||
mp_drawing.draw_landmarks(
|
landmark_drawing_spec=mp_drawing_styles.
|
||||||
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
get_default_pose_landmarks_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
# Plot pose world landmarks.
|
# Plot pose world landmarks.
|
||||||
mp_drawing.plot_landmarks(
|
mp_drawing.plot_landmarks(
|
||||||
|
@ -283,13 +289,18 @@ with mp_holistic.Holistic(
|
||||||
image.flags.writeable = True
|
image.flags.writeable = True
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
image,
|
||||||
|
results.face_landmarks,
|
||||||
|
mp_holistic.FACEMESH_CONTOURS,
|
||||||
|
landmark_drawing_spec=None,
|
||||||
|
connection_drawing_spec=mp_drawing_styles
|
||||||
|
.get_default_face_mesh_contours_style())
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
image,
|
||||||
mp_drawing.draw_landmarks(
|
results.pose_landmarks,
|
||||||
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
mp_holistic.POSE_CONNECTIONS,
|
||||||
mp_drawing.draw_landmarks(
|
landmark_drawing_spec=mp_drawing_styles
|
||||||
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
.get_default_pose_landmarks_style())
|
||||||
cv2.imshow('MediaPipe Holistic', image)
|
cv2.imshow('MediaPipe Holistic', image)
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
|
|
|
@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in
|
||||||
augmented reality.
|
augmented reality.
|
||||||
|
|
||||||
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
|
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
|
||||||
33 3D landmarks on the whole body from RGB video frames utilizing our
|
33 3D landmarks and background segmentation mask on the whole body from RGB
|
||||||
|
video frames utilizing our
|
||||||
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
||||||
research that also powers the
|
research that also powers the
|
||||||
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
|
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
|
||||||
|
@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be
|
||||||
effective in our [MediaPipe Hands](./hands.md) and
|
effective in our [MediaPipe Hands](./hands.md) and
|
||||||
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
|
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
|
||||||
first locates the person/pose region-of-interest (ROI) within the frame. The
|
first locates the person/pose region-of-interest (ROI) within the frame. The
|
||||||
tracker subsequently predicts the pose landmarks within the ROI using the
|
tracker subsequently predicts the pose landmarks and segmentation mask within
|
||||||
ROI-cropped frame as input. Note that for video use cases the detector is
|
the ROI using the ROI-cropped frame as input. Note that for video use cases the
|
||||||
invoked only as needed, i.e., for the very first frame and when the tracker
|
detector is invoked only as needed, i.e., for the very first frame and when the
|
||||||
could no longer identify body pose presence in the previous frame. For other
|
tracker could no longer identify body pose presence in the previous frame. For
|
||||||
frames the pipeline simply derives the ROI from the previous frame’s pose
|
other frames the pipeline simply derives the ROI from the previous frame’s pose
|
||||||
landmarks.
|
landmarks.
|
||||||
|
|
||||||
The pipeline is implemented as a MediaPipe
|
The pipeline is implemented as a MediaPipe
|
||||||
|
@ -129,16 +130,19 @@ hip midpoints.
|
||||||
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
|
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
|
||||||
(see figure below).
|
(see figure below).
|
||||||
|
|
||||||
Please find more detail in the
|
|
||||||
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
|
||||||
this [paper](https://arxiv.org/abs/2006.10204) and
|
|
||||||
[the model card](./models.md#pose), and the attributes in each landmark
|
|
||||||
[below](#pose_landmarks).
|
|
||||||
|
|
||||||
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
|
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
|
||||||
:----------------------------------------------------------------------------------------------: |
|
:----------------------------------------------------------------------------------------------: |
|
||||||
*Fig 4. 33 pose landmarks.* |
|
*Fig 4. 33 pose landmarks.* |
|
||||||
|
|
||||||
|
Optionally, MediaPipe Pose can predicts a full-body
|
||||||
|
[segmentation mask](#segmentation_mask) represented as a two-class segmentation
|
||||||
|
(human or background).
|
||||||
|
|
||||||
|
Please find more detail in the
|
||||||
|
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
||||||
|
this [paper](https://arxiv.org/abs/2006.10204),
|
||||||
|
[the model card](./models.md#pose) and the [Output](#Output) section below.
|
||||||
|
|
||||||
## Solution APIs
|
## Solution APIs
|
||||||
|
|
||||||
### Cross-platform Configuration Options
|
### Cross-platform Configuration Options
|
||||||
|
@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input
|
||||||
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
||||||
is also set to `true`. Default to `true`.
|
is also set to `true`. Default to `true`.
|
||||||
|
|
||||||
|
#### enable_segmentation
|
||||||
|
|
||||||
|
If set to `true`, in addition to the pose landmarks the solution also generates
|
||||||
|
the segmentation mask. Default to `false`.
|
||||||
|
|
||||||
|
#### smooth_segmentation
|
||||||
|
|
||||||
|
If set to `true`, the solution filters segmentation masks across different input
|
||||||
|
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
|
||||||
|
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
|
||||||
|
`true`.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
||||||
|
@ -211,6 +227,19 @@ the following:
|
||||||
* `visibility`: Identical to that defined in the corresponding
|
* `visibility`: Identical to that defined in the corresponding
|
||||||
[pose_landmarks](#pose_landmarks).
|
[pose_landmarks](#pose_landmarks).
|
||||||
|
|
||||||
|
#### segmentation_mask
|
||||||
|
|
||||||
|
The output segmentation mask, predicted only when
|
||||||
|
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
|
||||||
|
same width and height as the input image, and contains values in `[0.0, 1.0]`
|
||||||
|
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
|
||||||
|
pixel respectively. Please refer to the platform-specific usage examples below
|
||||||
|
for usage details.
|
||||||
|
|
||||||
|
*Fig 6. Example of MediaPipe Pose segmentation mask.* |
|
||||||
|
:-----------------------------------------------------------: |
|
||||||
|
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_segmentation.mp4" type="video/mp4"></video> |
|
||||||
|
|
||||||
### Python Solution API
|
### Python Solution API
|
||||||
|
|
||||||
Please first follow general [instructions](../getting_started/python.md) to
|
Please first follow general [instructions](../getting_started/python.md) to
|
||||||
|
@ -222,6 +251,8 @@ Supported configuration options:
|
||||||
* [static_image_mode](#static_image_mode)
|
* [static_image_mode](#static_image_mode)
|
||||||
* [model_complexity](#model_complexity)
|
* [model_complexity](#model_complexity)
|
||||||
* [smooth_landmarks](#smooth_landmarks)
|
* [smooth_landmarks](#smooth_landmarks)
|
||||||
|
* [enable_segmentation](#enable_segmentation)
|
||||||
|
* [smooth_segmentation](#smooth_segmentation)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
* [min_tracking_confidence](#min_tracking_confidence)
|
* [min_tracking_confidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -229,13 +260,16 @@ Supported configuration options:
|
||||||
import cv2
|
import cv2
|
||||||
import mediapipe as mp
|
import mediapipe as mp
|
||||||
mp_drawing = mp.solutions.drawing_utils
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
mp_pose = mp.solutions.pose
|
mp_pose = mp.solutions.pose
|
||||||
|
|
||||||
# For static images:
|
# For static images:
|
||||||
IMAGE_FILES = []
|
IMAGE_FILES = []
|
||||||
|
BG_COLOR = (192, 192, 192) # gray
|
||||||
with mp_pose.Pose(
|
with mp_pose.Pose(
|
||||||
static_image_mode=True,
|
static_image_mode=True,
|
||||||
model_complexity=2,
|
model_complexity=2,
|
||||||
|
enable_segmentation=True,
|
||||||
min_detection_confidence=0.5) as pose:
|
min_detection_confidence=0.5) as pose:
|
||||||
for idx, file in enumerate(IMAGE_FILES):
|
for idx, file in enumerate(IMAGE_FILES):
|
||||||
image = cv2.imread(file)
|
image = cv2.imread(file)
|
||||||
|
@ -250,10 +284,21 @@ with mp_pose.Pose(
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
||||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
||||||
)
|
)
|
||||||
# Draw pose landmarks on the image.
|
|
||||||
annotated_image = image.copy()
|
annotated_image = image.copy()
|
||||||
|
# Draw segmentation on the image.
|
||||||
|
# To improve segmentation around boundaries, consider applying a joint
|
||||||
|
# bilateral filter to "results.segmentation_mask" with "image".
|
||||||
|
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
|
||||||
|
bg_image = np.zeros(image.shape, dtype=np.uint8)
|
||||||
|
bg_image[:] = BG_COLOR
|
||||||
|
annotated_image = np.where(condition, annotated_image, bg_image)
|
||||||
|
# Draw pose landmarks on the image.
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
annotated_image,
|
||||||
|
results.pose_landmarks,
|
||||||
|
mp_pose.POSE_CONNECTIONS,
|
||||||
|
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
# Plot pose world landmarks.
|
# Plot pose world landmarks.
|
||||||
mp_drawing.plot_landmarks(
|
mp_drawing.plot_landmarks(
|
||||||
|
@ -283,7 +328,10 @@ with mp_pose.Pose(
|
||||||
image.flags.writeable = True
|
image.flags.writeable = True
|
||||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
image,
|
||||||
|
results.pose_landmarks,
|
||||||
|
mp_pose.POSE_CONNECTIONS,
|
||||||
|
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||||
cv2.imshow('MediaPipe Pose', image)
|
cv2.imshow('MediaPipe Pose', image)
|
||||||
if cv2.waitKey(5) & 0xFF == 27:
|
if cv2.waitKey(5) & 0xFF == 27:
|
||||||
break
|
break
|
||||||
|
@ -300,6 +348,8 @@ Supported configuration options:
|
||||||
|
|
||||||
* [modelComplexity](#model_complexity)
|
* [modelComplexity](#model_complexity)
|
||||||
* [smoothLandmarks](#smooth_landmarks)
|
* [smoothLandmarks](#smooth_landmarks)
|
||||||
|
* [enableSegmentation](#enable_segmentation)
|
||||||
|
* [smoothSegmentation](#smooth_segmentation)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
* [minTrackingConfidence](#min_tracking_confidence)
|
* [minTrackingConfidence](#min_tracking_confidence)
|
||||||
|
|
||||||
|
@ -340,8 +390,20 @@ function onResults(results) {
|
||||||
|
|
||||||
canvasCtx.save();
|
canvasCtx.save();
|
||||||
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
canvasCtx.drawImage(results.segmentationMask, 0, 0,
|
||||||
|
canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite existing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-in';
|
||||||
|
canvasCtx.fillStyle = '#00FF00';
|
||||||
|
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
// Only overwrite missing pixels.
|
||||||
|
canvasCtx.globalCompositeOperation = 'destination-atop';
|
||||||
canvasCtx.drawImage(
|
canvasCtx.drawImage(
|
||||||
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
||||||
|
|
||||||
|
canvasCtx.globalCompositeOperation = 'source-over';
|
||||||
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
||||||
{color: '#00FF00', lineWidth: 4});
|
{color: '#00FF00', lineWidth: 4});
|
||||||
drawLandmarks(canvasCtx, results.poseLandmarks,
|
drawLandmarks(canvasCtx, results.poseLandmarks,
|
||||||
|
@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => {
|
||||||
pose.setOptions({
|
pose.setOptions({
|
||||||
modelComplexity: 1,
|
modelComplexity: 1,
|
||||||
smoothLandmarks: true,
|
smoothLandmarks: true,
|
||||||
|
enableSegmentation: true,
|
||||||
|
smoothSegmentation: true,
|
||||||
minDetectionConfidence: 0.5,
|
minDetectionConfidence: 0.5,
|
||||||
minTrackingConfidence: 0.5
|
minTrackingConfidence: 0.5
|
||||||
});
|
});
|
||||||
|
|
|
@ -64,8 +64,9 @@ std::string ToString(GateState state) {
|
||||||
// ALLOW or DISALLOW can also be specified as an input side packet. The rules
|
// ALLOW or DISALLOW can also be specified as an input side packet. The rules
|
||||||
// for evaluation remain the same as above.
|
// for evaluation remain the same as above.
|
||||||
//
|
//
|
||||||
// ALLOW/DISALLOW inputs must be specified either using input stream or
|
// ALLOW/DISALLOW inputs must be specified either using input stream or via
|
||||||
// via input side packet but not both.
|
// input side packet but not both. If neither is specified, the behavior is then
|
||||||
|
// determined by the "allow" field in the calculator options.
|
||||||
//
|
//
|
||||||
// Intended to be used with the default input stream handler, which synchronizes
|
// Intended to be used with the default input stream handler, which synchronizes
|
||||||
// all data input streams with the ALLOW/DISALLOW control input stream.
|
// all data input streams with the ALLOW/DISALLOW control input stream.
|
||||||
|
@ -92,20 +93,22 @@ class GateCalculator : public CalculatorBase {
|
||||||
cc->InputSidePackets().HasTag(kDisallowTag);
|
cc->InputSidePackets().HasTag(kDisallowTag);
|
||||||
bool input_via_stream =
|
bool input_via_stream =
|
||||||
cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
|
cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
|
||||||
// Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW
|
|
||||||
// input.
|
|
||||||
RET_CHECK(input_via_side_packet ^ input_via_stream);
|
|
||||||
|
|
||||||
|
// Only one of input_side_packet or input_stream may specify
|
||||||
|
// ALLOW/DISALLOW input.
|
||||||
if (input_via_side_packet) {
|
if (input_via_side_packet) {
|
||||||
|
RET_CHECK(!input_via_stream);
|
||||||
RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
|
RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
|
||||||
cc->InputSidePackets().HasTag(kDisallowTag));
|
cc->InputSidePackets().HasTag(kDisallowTag));
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||||
cc->InputSidePackets().Tag(kAllowTag).Set<bool>();
|
cc->InputSidePackets().Tag(kAllowTag).Set<bool>().Optional();
|
||||||
} else {
|
} else {
|
||||||
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>();
|
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>().Optional();
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
if (input_via_stream) {
|
||||||
|
RET_CHECK(!input_via_side_packet);
|
||||||
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
|
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
|
||||||
cc->Inputs().HasTag(kDisallowTag));
|
cc->Inputs().HasTag(kDisallowTag));
|
||||||
|
|
||||||
|
@ -139,7 +142,6 @@ class GateCalculator : public CalculatorBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Open(CalculatorContext* cc) final {
|
absl::Status Open(CalculatorContext* cc) final {
|
||||||
use_side_packet_for_allow_disallow_ = false;
|
|
||||||
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||||
use_side_packet_for_allow_disallow_ = true;
|
use_side_packet_for_allow_disallow_ = true;
|
||||||
allow_by_side_packet_decision_ =
|
allow_by_side_packet_decision_ =
|
||||||
|
@ -158,12 +160,20 @@ class GateCalculator : public CalculatorBase {
|
||||||
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
|
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
|
||||||
empty_packets_as_allow_ = options.empty_packets_as_allow();
|
empty_packets_as_allow_ = options.empty_packets_as_allow();
|
||||||
|
|
||||||
|
if (!use_side_packet_for_allow_disallow_ &&
|
||||||
|
!cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) {
|
||||||
|
use_option_for_allow_disallow_ = true;
|
||||||
|
allow_by_option_decision_ = options.allow();
|
||||||
|
}
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) final {
|
absl::Status Process(CalculatorContext* cc) final {
|
||||||
bool allow = empty_packets_as_allow_;
|
bool allow = empty_packets_as_allow_;
|
||||||
if (use_side_packet_for_allow_disallow_) {
|
if (use_option_for_allow_disallow_) {
|
||||||
|
allow = allow_by_option_decision_;
|
||||||
|
} else if (use_side_packet_for_allow_disallow_) {
|
||||||
allow = allow_by_side_packet_decision_;
|
allow = allow_by_side_packet_decision_;
|
||||||
} else {
|
} else {
|
||||||
if (cc->Inputs().HasTag(kAllowTag) &&
|
if (cc->Inputs().HasTag(kAllowTag) &&
|
||||||
|
@ -217,8 +227,10 @@ class GateCalculator : public CalculatorBase {
|
||||||
GateState last_gate_state_ = GATE_UNINITIALIZED;
|
GateState last_gate_state_ = GATE_UNINITIALIZED;
|
||||||
int num_data_streams_;
|
int num_data_streams_;
|
||||||
bool empty_packets_as_allow_;
|
bool empty_packets_as_allow_;
|
||||||
bool use_side_packet_for_allow_disallow_;
|
bool use_side_packet_for_allow_disallow_ = false;
|
||||||
bool allow_by_side_packet_decision_;
|
bool allow_by_side_packet_decision_;
|
||||||
|
bool use_option_for_allow_disallow_ = false;
|
||||||
|
bool allow_by_option_decision_;
|
||||||
};
|
};
|
||||||
REGISTER_CALCULATOR(GateCalculator);
|
REGISTER_CALCULATOR(GateCalculator);
|
||||||
|
|
||||||
|
|
|
@ -29,4 +29,8 @@ message GateCalculatorOptions {
|
||||||
// disallowing the corresponding packets in the data input streams. Setting
|
// disallowing the corresponding packets in the data input streams. Setting
|
||||||
// this option to true inverts that, allowing the data packets to go through.
|
// this option to true inverts that, allowing the data packets to go through.
|
||||||
optional bool empty_packets_as_allow = 1;
|
optional bool empty_packets_as_allow = 1;
|
||||||
|
|
||||||
|
// Whether to allow or disallow the input streams to pass when no
|
||||||
|
// ALLOW/DISALLOW input or side input is specified.
|
||||||
|
optional bool allow = 2 [default = false];
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) {
|
||||||
)")));
|
)")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(2, output.size());
|
||||||
|
EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value());
|
||||||
|
EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value());
|
||||||
|
EXPECT_EQ(true, output[0].Get<bool>());
|
||||||
|
EXPECT_EQ(false, output[1].Get<bool>());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(0, output.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) {
|
||||||
|
SetRunner(R"(
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "test_input"
|
||||||
|
output_stream: "test_output"
|
||||||
|
)");
|
||||||
|
|
||||||
|
constexpr int64 kTimestampValue0 = 42;
|
||||||
|
RunTimeStep(kTimestampValue0, true);
|
||||||
|
constexpr int64 kTimestampValue1 = 43;
|
||||||
|
RunTimeStep(kTimestampValue1, false);
|
||||||
|
|
||||||
|
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||||
|
ASSERT_EQ(0, output.size());
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
||||||
SetRunner(R"(
|
SetRunner(R"(
|
||||||
calculator: "GateCalculator"
|
calculator: "GateCalculator"
|
||||||
|
|
|
@ -661,3 +661,138 @@ cc_test(
|
||||||
"//mediapipe/framework/port:parse_text_proto",
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation",
|
||||||
|
hdrs = ["affine_transformation.h"],
|
||||||
|
deps = ["@com_google_absl//absl/status:statusor"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation_runner_gl",
|
||||||
|
srcs = ["affine_transformation_runner_gl.cc"],
|
||||||
|
hdrs = ["affine_transformation_runner_gl.h"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gl_simple_shaders",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||||
|
"//mediapipe/gpu:shader_util",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "affine_transformation_runner_opencv",
|
||||||
|
srcs = ["affine_transformation_runner_opencv.cc"],
|
||||||
|
hdrs = ["affine_transformation_runner_opencv.h"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
"//mediapipe/framework/port:opencv_core",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "warp_affine_calculator_proto",
|
||||||
|
srcs = ["warp_affine_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/gpu:gpu_origin_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "warp_affine_calculator",
|
||||||
|
srcs = ["warp_affine_calculator.cc"],
|
||||||
|
hdrs = ["warp_affine_calculator.h"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
":affine_transformation_runner_opencv",
|
||||||
|
":warp_affine_calculator_cc_proto",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
] + select({
|
||||||
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
":affine_transformation_runner_gl",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "warp_affine_calculator_test",
|
||||||
|
srcs = ["warp_affine_calculator_test.cc"],
|
||||||
|
data = [
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png",
|
||||||
|
"//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png",
|
||||||
|
],
|
||||||
|
tags = ["desktop_only_test"],
|
||||||
|
deps = [
|
||||||
|
":affine_transformation",
|
||||||
|
":warp_affine_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_converter",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_utils",
|
||||||
|
"//mediapipe/calculators/util:from_image_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/deps:file_path",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
"//mediapipe/framework/formats:rect_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:tensor",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:integral_types",
|
||||||
|
"//mediapipe/framework/port:opencv_core",
|
||||||
|
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
|
"@com_google_absl//absl/flags:flag",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
55
mediapipe/calculators/image/affine_transformation.h
Normal file
55
mediapipe/calculators/image/affine_transformation.h
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
class AffineTransformation {
|
||||||
|
public:
|
||||||
|
// Pixel extrapolation method.
|
||||||
|
// When converting image to tensor it may happen that tensor needs to read
|
||||||
|
// pixels outside image boundaries. Border mode helps to specify how such
|
||||||
|
// pixels will be calculated.
|
||||||
|
enum class BorderMode { kZero, kReplicate };
|
||||||
|
|
||||||
|
struct Size {
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InputT, typename OutputT>
|
||||||
|
class Runner {
|
||||||
|
public:
|
||||||
|
virtual ~Runner() = default;
|
||||||
|
|
||||||
|
// Transforms input into output using @matrix as following:
|
||||||
|
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||||
|
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||||
|
// where x and y ranges are defined by @output_size.
|
||||||
|
virtual absl::StatusOr<OutputT> Run(const InputT& input,
|
||||||
|
const std::array<float, 16>& matrix,
|
||||||
|
const Size& output_size,
|
||||||
|
BorderMode border_mode) = 0;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
|
@ -0,0 +1,354 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "Eigen/Geometry"
|
||||||
|
#include "Eigen/LU"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
#include "mediapipe/gpu/shader_util.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using mediapipe::GlCalculatorHelper;
|
||||||
|
using mediapipe::GlhCreateProgram;
|
||||||
|
using mediapipe::GlTexture;
|
||||||
|
using mediapipe::GpuBuffer;
|
||||||
|
using mediapipe::GpuOrigin;
|
||||||
|
|
||||||
|
bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) {
|
||||||
|
switch (gpu_origin) {
|
||||||
|
case GpuOrigin::DEFAULT:
|
||||||
|
case GpuOrigin::CONVENTIONAL:
|
||||||
|
#ifdef __APPLE__
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif // __APPLE__
|
||||||
|
case GpuOrigin::TOP_LEFT:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0
|
||||||
|
#else
|
||||||
|
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1
|
||||||
|
#endif // __APPLE__
|
||||||
|
|
||||||
|
bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
|
||||||
|
return gl_context.gl_major_version() > 3 ||
|
||||||
|
(gl_context.gl_major_version() == 3 &&
|
||||||
|
gl_context.gl_minor_version() >= 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int kAttribVertex = 0;
|
||||||
|
constexpr int kAttribTexturePosition = 1;
|
||||||
|
constexpr int kNumAttributes = 2;
|
||||||
|
|
||||||
|
class GlTextureWarpAffineRunner
|
||||||
|
: public AffineTransformation::Runner<GpuBuffer,
|
||||||
|
std::unique_ptr<GpuBuffer>> {
|
||||||
|
public:
|
||||||
|
GlTextureWarpAffineRunner(std::shared_ptr<GlCalculatorHelper> gl_helper,
|
||||||
|
GpuOrigin::Mode gpu_origin)
|
||||||
|
: gl_helper_(gl_helper), gpu_origin_(gpu_origin) {}
|
||||||
|
absl::Status Init() {
|
||||||
|
return gl_helper_->RunInGlContext([this]() -> absl::Status {
|
||||||
|
const GLint attr_location[kNumAttributes] = {
|
||||||
|
kAttribVertex,
|
||||||
|
kAttribTexturePosition,
|
||||||
|
};
|
||||||
|
const GLchar* attr_name[kNumAttributes] = {
|
||||||
|
"position",
|
||||||
|
"texture_coordinate",
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr GLchar kVertShader[] = R"(
|
||||||
|
in vec4 position;
|
||||||
|
in mediump vec4 texture_coordinate;
|
||||||
|
out mediump vec2 sample_coordinate;
|
||||||
|
uniform mat4 transform_matrix;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
gl_Position = position;
|
||||||
|
vec4 tc = transform_matrix * texture_coordinate;
|
||||||
|
sample_coordinate = tc.xy;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
constexpr GLchar kFragShader[] = R"(
|
||||||
|
DEFAULT_PRECISION(mediump, float)
|
||||||
|
in vec2 sample_coordinate;
|
||||||
|
uniform sampler2D input_texture;
|
||||||
|
|
||||||
|
#ifdef GL_ES
|
||||||
|
#define fragColor gl_FragColor
|
||||||
|
#else
|
||||||
|
out vec4 fragColor;
|
||||||
|
#endif // defined(GL_ES);
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec4 color = texture2D(input_texture, sample_coordinate);
|
||||||
|
#ifdef CUSTOM_ZERO_BORDER_MODE
|
||||||
|
float out_of_bounds =
|
||||||
|
float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 ||
|
||||||
|
sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0);
|
||||||
|
color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds);
|
||||||
|
#endif // defined(CUSTOM_ZERO_BORDER_MODE)
|
||||||
|
fragColor = color;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
// Create program and set parameters.
|
||||||
|
auto create_fn = [&](const std::string& vs,
|
||||||
|
const std::string& fs) -> absl::StatusOr<Program> {
|
||||||
|
GLuint program = 0;
|
||||||
|
GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0],
|
||||||
|
attr_location, &program);
|
||||||
|
|
||||||
|
RET_CHECK(program) << "Problem initializing warp affine program.";
|
||||||
|
glUseProgram(program);
|
||||||
|
glUniform1i(glGetUniformLocation(program, "input_texture"), 1);
|
||||||
|
GLint matrix_id = glGetUniformLocation(program, "transform_matrix");
|
||||||
|
return Program{.id = program, .matrix_id = matrix_id};
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::string vert_src =
|
||||||
|
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader);
|
||||||
|
|
||||||
|
const std::string frag_src = absl::StrCat(
|
||||||
|
mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader);
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
|
||||||
|
|
||||||
|
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
|
||||||
|
std::string custom_zero_border_mode_def = R"(
|
||||||
|
#define CUSTOM_ZERO_BORDER_MODE
|
||||||
|
)";
|
||||||
|
const std::string frag_custom_zero_src =
|
||||||
|
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
|
||||||
|
custom_zero_border_mode_def, kFragShader);
|
||||||
|
return create_fn(vert_src, frag_custom_zero_src);
|
||||||
|
};
|
||||||
|
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
|
||||||
|
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||||
|
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
|
||||||
|
glGenFramebuffers(1, &framebuffer_);
|
||||||
|
|
||||||
|
// vertex storage
|
||||||
|
glGenBuffers(2, vbo_);
|
||||||
|
glGenVertexArrays(1, &vao_);
|
||||||
|
|
||||||
|
// vbo 0
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
|
||||||
|
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
|
||||||
|
|
||||||
|
// vbo 1
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
|
||||||
|
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
|
||||||
|
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<GpuBuffer>> Run(
|
||||||
|
const GpuBuffer& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
std::unique_ptr<GpuBuffer> gpu_buffer;
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode,
|
||||||
|
&gpu_buffer]() -> absl::Status {
|
||||||
|
auto input_texture = gl_helper_->CreateSourceTexture(input);
|
||||||
|
auto output_texture = gl_helper_->CreateDestinationTexture(
|
||||||
|
size.width, size.height, input.format());
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
RunInternal(input_texture, matrix, border_mode, &output_texture));
|
||||||
|
gpu_buffer = output_texture.GetFrame<GpuBuffer>();
|
||||||
|
return absl::OkStatus();
|
||||||
|
}));
|
||||||
|
|
||||||
|
return gpu_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status RunInternal(const GlTexture& texture,
|
||||||
|
const std::array<float, 16>& matrix,
|
||||||
|
AffineTransformation::BorderMode border_mode,
|
||||||
|
GlTexture* output) {
|
||||||
|
glDisable(GL_DEPTH_TEST);
|
||||||
|
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
|
||||||
|
glViewport(0, 0, output->width(), output->height());
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, output->name());
|
||||||
|
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||||
|
output->name(), 0);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(texture.target(), texture.name());
|
||||||
|
|
||||||
|
// a) Filtering.
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
|
||||||
|
// b) Clamping.
|
||||||
|
std::optional<Program> program = program_;
|
||||||
|
switch (border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kReplicate: {
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AffineTransformation::BorderMode::kZero: {
|
||||||
|
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
if (program_custom_zero_) {
|
||||||
|
program = program_custom_zero_;
|
||||||
|
} else {
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
|
||||||
|
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR,
|
||||||
|
std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f}.data());
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
RET_CHECK(program_custom_zero_)
|
||||||
|
<< "Program must have been initialized.";
|
||||||
|
program = program_custom_zero_;
|
||||||
|
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glUseProgram(program->id);
|
||||||
|
|
||||||
|
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> eigen_mat(matrix.data());
|
||||||
|
if (IsMatrixVerticalFlipNeeded(gpu_origin_)) {
|
||||||
|
// @matrix describes affine transformation in terms of TOP LEFT origin, so
|
||||||
|
// in some cases/on some platforms an extra flipping should be done before
|
||||||
|
// and after.
|
||||||
|
const Eigen::Matrix<float, 4, 4, Eigen::RowMajor> flip_y(
|
||||||
|
{{1.0f, 0.0f, 0.0f, 0.0f},
|
||||||
|
{0.0f, -1.0f, 0.0f, 1.0f},
|
||||||
|
{0.0f, 0.0f, 1.0f, 0.0f},
|
||||||
|
{0.0f, 0.0f, 0.0f, 1.0f}});
|
||||||
|
eigen_mat = flip_y * eigen_mat * flip_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If GL context is ES2, then GL_FALSE must be used for 'transpose'
|
||||||
|
// GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported.
|
||||||
|
// Hence, transposing the matrix and always passing transposed.
|
||||||
|
eigen_mat.transposeInPlace();
|
||||||
|
glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data());
|
||||||
|
|
||||||
|
// vao
|
||||||
|
glBindVertexArray(vao_);
|
||||||
|
|
||||||
|
// vbo 0
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||||
|
glEnableVertexAttribArray(kAttribVertex);
|
||||||
|
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// vbo 1
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||||
|
glEnableVertexAttribArray(kAttribTexturePosition);
|
||||||
|
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// draw
|
||||||
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
|
||||||
|
// Resetting to MediaPipe texture param defaults.
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
|
||||||
|
glDisableVertexAttribArray(kAttribVertex);
|
||||||
|
glDisableVertexAttribArray(kAttribTexturePosition);
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
|
glBindVertexArray(0);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
~GlTextureWarpAffineRunner() override {
|
||||||
|
gl_helper_->RunInGlContext([this]() {
|
||||||
|
// Release OpenGL resources.
|
||||||
|
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
|
||||||
|
if (program_.id != 0) glDeleteProgram(program_.id);
|
||||||
|
if (program_custom_zero_ && program_custom_zero_->id != 0) {
|
||||||
|
glDeleteProgram(program_custom_zero_->id);
|
||||||
|
}
|
||||||
|
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
|
||||||
|
glDeleteBuffers(2, vbo_);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Program {
|
||||||
|
GLuint id;
|
||||||
|
GLint matrix_id;
|
||||||
|
};
|
||||||
|
std::shared_ptr<GlCalculatorHelper> gl_helper_;
|
||||||
|
GpuOrigin::Mode gpu_origin_;
|
||||||
|
GLuint vao_ = 0;
|
||||||
|
GLuint vbo_[2] = {0, 0};
|
||||||
|
Program program_;
|
||||||
|
std::optional<Program> program_custom_zero_;
|
||||||
|
GLuint framebuffer_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<
|
||||||
|
AffineTransformation::Runner<GpuBuffer, std::unique_ptr<GpuBuffer>>>>
|
||||||
|
CreateAffineTransformationGlRunner(
|
||||||
|
std::shared_ptr<GlCalculatorHelper> gl_helper, GpuOrigin::Mode gpu_origin) {
|
||||||
|
auto runner =
|
||||||
|
absl::make_unique<GlTextureWarpAffineRunner>(gl_helper, gpu_origin);
|
||||||
|
MP_RETURN_IF_ERROR(runner->Init());
|
||||||
|
return runner;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,36 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<AffineTransformation::Runner<
|
||||||
|
mediapipe::GpuBuffer, std::unique_ptr<mediapipe::GpuBuffer>>>>
|
||||||
|
CreateAffineTransformationGlRunner(
|
||||||
|
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper,
|
||||||
|
mediapipe::GpuOrigin::Mode gpu_origin);
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
|
@ -0,0 +1,160 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cv::BorderTypes GetBorderModeForOpenCv(
|
||||||
|
AffineTransformation::BorderMode border_mode) {
|
||||||
|
switch (border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kZero:
|
||||||
|
return cv::BORDER_CONSTANT;
|
||||||
|
case AffineTransformation::BorderMode::kReplicate:
|
||||||
|
return cv::BORDER_REPLICATE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class OpenCvRunner
|
||||||
|
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
|
||||||
|
public:
|
||||||
|
absl::StatusOr<ImageFrame> Run(
|
||||||
|
const ImageFrame& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
|
||||||
|
// accepts and produces relative coordinates) should be adjusted to first
|
||||||
|
// normalize coordinates and then scale them.
|
||||||
|
// clang-format off
|
||||||
|
cv::Matx44f normalize_dst_coordinate({
|
||||||
|
1.0f / size.width, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f / size.height, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
cv::Matx44f scale_src_coordinate({
|
||||||
|
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
// clang-format on
|
||||||
|
cv::Matx44f adjust_dst_coordinate;
|
||||||
|
cv::Matx44f adjust_src_coordinate;
|
||||||
|
// TODO: update to always use accurate implementation.
|
||||||
|
constexpr bool kOpenCvCompatibility = true;
|
||||||
|
if (kOpenCvCompatibility) {
|
||||||
|
adjust_dst_coordinate = normalize_dst_coordinate;
|
||||||
|
adjust_src_coordinate = scale_src_coordinate;
|
||||||
|
} else {
|
||||||
|
// To do an accurate affine image transformation and make "on-cpu" and
|
||||||
|
// "on-gpu" calculations aligned - extra offset is required to select
|
||||||
|
// correct pixels.
|
||||||
|
//
|
||||||
|
// Each destination pixel corresponds to some pixels region from source
|
||||||
|
// image.(In case of downscaling there can be more than one pixel.) The
|
||||||
|
// offset for x and y is calculated in the way, so pixel in the middle of
|
||||||
|
// the region is selected.
|
||||||
|
//
|
||||||
|
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
|
||||||
|
// without a rotation:
|
||||||
|
// 1. Each destination pixel corresponds to 10x5 region
|
||||||
|
// X range: [0, .. , 9]
|
||||||
|
// Y range: [0, .. , 4]
|
||||||
|
// 2. Considering we have __discrete__ pixels, the center of the region is
|
||||||
|
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
|
||||||
|
// (4.5, 2).
|
||||||
|
// 3. When using the above as an offset for every pixel select while
|
||||||
|
// downscaling, resulting pixels are:
|
||||||
|
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
|
||||||
|
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
|
||||||
|
// ..
|
||||||
|
// (4.5, 47), (14.5, 47), .., (94.5, 47)
|
||||||
|
// instead of:
|
||||||
|
// (0, 0), (10, 0), .. , (90, 0)
|
||||||
|
// (0, 5), (10, 7), .. , (90, 5)
|
||||||
|
// ..
|
||||||
|
// (0, 45), (10, 45), .., (90, 45)
|
||||||
|
// The latter looks shifted.
|
||||||
|
//
|
||||||
|
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
|
||||||
|
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
|
||||||
|
// transformation matrix should shift coordinates by (0.5, 0.5) as the
|
||||||
|
// very first step.
|
||||||
|
//
|
||||||
|
// Due to the above shift, transformed coordinates would be valid for
|
||||||
|
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
|
||||||
|
// T0 make it valid for __discrete__ pixels, transformation matrix should
|
||||||
|
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
|
||||||
|
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
|
||||||
|
// clang-format off
|
||||||
|
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
|
||||||
|
0.0f, 1.0f, 0.0f, 0.5f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
|
||||||
|
0.0f, 1.0f, 0.0f, -0.5f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f});
|
||||||
|
// clang-format on
|
||||||
|
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
|
||||||
|
adjust_src_coordinate = shift_src * scale_src_coordinate;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Matx44f transform(matrix.data());
|
||||||
|
cv::Matx44f transform_absolute =
|
||||||
|
adjust_src_coordinate * transform * adjust_dst_coordinate;
|
||||||
|
|
||||||
|
cv::Mat in_mat = formats::MatView(&input);
|
||||||
|
|
||||||
|
cv::Mat cv_affine_transform(2, 3, CV_32F);
|
||||||
|
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
|
||||||
|
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
|
||||||
|
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
|
||||||
|
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
|
||||||
|
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
|
||||||
|
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
|
||||||
|
|
||||||
|
ImageFrame out_image(input.Format(), size.width, size.height);
|
||||||
|
cv::Mat out_mat = formats::MatView(&out_image);
|
||||||
|
|
||||||
|
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
|
||||||
|
cv::Size(out_mat.cols, out_mat.rows),
|
||||||
|
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
|
||||||
|
GetBorderModeForOpenCv(border_mode));
|
||||||
|
|
||||||
|
return out_image;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::StatusOr<
|
||||||
|
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||||
|
CreateAffineTransformationOpenCvRunner() {
|
||||||
|
return absl::make_unique<OpenCvRunner>();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
absl::StatusOr<
|
||||||
|
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||||
|
CreateAffineTransformationOpenCvRunner();
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
|
@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) {
|
||||||
scale_image::FindOutputDimensions(crop_width_, crop_height_, //
|
scale_image::FindOutputDimensions(crop_width_, crop_height_, //
|
||||||
options_.target_width(), //
|
options_.target_width(), //
|
||||||
options_.target_height(), //
|
options_.target_height(), //
|
||||||
|
options_.target_max_area(), //
|
||||||
options_.preserve_aspect_ratio(), //
|
options_.preserve_aspect_ratio(), //
|
||||||
options_.scale_to_multiple_of(), //
|
options_.scale_to_multiple_of(), //
|
||||||
&output_width_, &output_height_));
|
&output_width_, &output_height_));
|
||||||
|
|
|
@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions {
|
||||||
optional int32 target_width = 1;
|
optional int32 target_width = 1;
|
||||||
optional int32 target_height = 2;
|
optional int32 target_height = 2;
|
||||||
|
|
||||||
|
// If set, then automatically calculates a target_width and target_height that
|
||||||
|
// has an area below the target max area. Aspect ratio preservation cannot be
|
||||||
|
// disabled.
|
||||||
|
optional int32 target_max_area = 15;
|
||||||
|
|
||||||
// If true, the image is scaled up or down proportionally so that it
|
// If true, the image is scaled up or down proportionally so that it
|
||||||
// fits inside the box represented by target_width and target_height.
|
// fits inside the box represented by target_width and target_height.
|
||||||
// Otherwise it is scaled to fit target_width and target_height
|
// Otherwise it is scaled to fit target_width and target_height
|
||||||
|
|
|
@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, //
|
||||||
int input_height, //
|
int input_height, //
|
||||||
int target_width, //
|
int target_width, //
|
||||||
int target_height, //
|
int target_height, //
|
||||||
|
int target_max_area, //
|
||||||
bool preserve_aspect_ratio, //
|
bool preserve_aspect_ratio, //
|
||||||
int scale_to_multiple_of, //
|
int scale_to_multiple_of, //
|
||||||
int* output_width, int* output_height) {
|
int* output_width, int* output_height) {
|
||||||
CHECK(output_width);
|
CHECK(output_width);
|
||||||
CHECK(output_height);
|
CHECK(output_height);
|
||||||
|
|
||||||
|
if (target_max_area > 0 && input_width * input_height > target_max_area) {
|
||||||
|
preserve_aspect_ratio = true;
|
||||||
|
target_height = static_cast<int>(sqrt(static_cast<double>(target_max_area) /
|
||||||
|
(static_cast<double>(input_width) /
|
||||||
|
static_cast<double>(input_height))));
|
||||||
|
target_width = -1; // Resize width to preserve aspect ratio.
|
||||||
|
}
|
||||||
|
|
||||||
if (preserve_aspect_ratio) {
|
if (preserve_aspect_ratio) {
|
||||||
RET_CHECK(scale_to_multiple_of == 2)
|
RET_CHECK(scale_to_multiple_of == 2)
|
||||||
<< "FindOutputDimensions always outputs width and height that are "
|
<< "FindOutputDimensions always outputs width and height that are "
|
||||||
|
@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, //
|
||||||
<< "Unable to set output dimensions based on target dimensions.";
|
<< "Unable to set output dimensions based on target dimensions.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::Status FindOutputDimensions(int input_width, //
|
||||||
|
int input_height, //
|
||||||
|
int target_width, //
|
||||||
|
int target_height, //
|
||||||
|
bool preserve_aspect_ratio, //
|
||||||
|
int scale_to_multiple_of, //
|
||||||
|
int* output_width, int* output_height) {
|
||||||
|
return FindOutputDimensions(
|
||||||
|
input_width, input_height, target_width, target_height, -1,
|
||||||
|
preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace scale_image
|
} // namespace scale_image
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, //
|
||||||
int* crop_width, int* crop_height, //
|
int* crop_width, int* crop_height, //
|
||||||
int* col_start, int* row_start);
|
int* col_start, int* row_start);
|
||||||
|
|
||||||
// Given an input width and height, a target width and height, whether to
|
// Given an input width and height, a target width and height or max area,
|
||||||
// preserve the aspect ratio, and whether to round-down to the multiple of a
|
// whether to preserve the aspect ratio, and whether to round-down to the
|
||||||
// given number nearest to the targets, determine the output width and height.
|
// multiple of a given number nearest to the targets, determine the output width
|
||||||
// If target_width or target_height is non-positive, then they will be set to
|
// and height. If target_width or target_height is non-positive, then they will
|
||||||
// the input_width and input_height respectively. If scale_to_multiple_of is
|
// be set to the input_width and input_height respectively. If target_area is
|
||||||
// less than 1, it will be treated like 1. The output_width and
|
// non-positive, then it will be ignored. If scale_to_multiple_of is less than
|
||||||
// output_height will be reduced as necessary to preserve_aspect_ratio if the
|
// 1, it will be treated like 1. The output_width and output_height will be
|
||||||
// option is specified. If preserving the aspect ratio is desired, you must set
|
// reduced as necessary to preserve_aspect_ratio if the option is specified. If
|
||||||
// scale_to_multiple_of to 2.
|
// preserving the aspect ratio is desired, you must set scale_to_multiple_of
|
||||||
|
// to 2.
|
||||||
|
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||||
|
int target_width,
|
||||||
|
int target_height, //
|
||||||
|
int target_max_area, //
|
||||||
|
bool preserve_aspect_ratio, //
|
||||||
|
int scale_to_multiple_of, //
|
||||||
|
int* output_width, int* output_height);
|
||||||
|
|
||||||
|
// Backwards compatible helper.
|
||||||
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||||
int target_width,
|
int target_width,
|
||||||
int target_height, //
|
int target_height, //
|
||||||
|
|
|
@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Not scale.
|
// Not scale.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(100, output_height);
|
EXPECT_EQ(100, output_height);
|
||||||
// Not scale with odd input size.
|
// Not scale with odd input size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(201, output_width);
|
EXPECT_EQ(201, output_width);
|
||||||
EXPECT_EQ(101, output_height);
|
EXPECT_EQ(101, output_height);
|
||||||
// Scale down by 1/2.
|
// Scale down by 1/2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale up, doubling dimensions.
|
// Scale up, doubling dimensions.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(400, output_width);
|
EXPECT_EQ(400, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
|
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
|
||||||
// visible by 2.
|
// visible by 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(150, output_width);
|
EXPECT_EQ(150, output_width);
|
||||||
EXPECT_EQ(74, output_height);
|
EXPECT_EQ(74, output_height);
|
||||||
// Fits a 2:1 image into a 400 x 50 box.
|
// Fits a 2:1 image into a 400 x 50 box.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to multiple number with odd targe size.
|
// Scale to multiple number with odd targe size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to multiple number with odd targe size.
|
// Scale to multiple number with odd targe size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(50, output_height);
|
EXPECT_EQ(50, output_height);
|
||||||
// Scale to odd size.
|
// Scale to odd size.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(151, output_width);
|
EXPECT_EQ(151, output_width);
|
||||||
EXPECT_EQ(101, output_height);
|
EXPECT_EQ(101, output_height);
|
||||||
}
|
}
|
||||||
|
@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Scale width only.
|
// Scale width only.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(100, output_width);
|
EXPECT_EQ(100, output_width);
|
||||||
EXPECT_EQ(100, output_height);
|
EXPECT_EQ(100, output_height);
|
||||||
// Scale height only.
|
// Scale height only.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Scale both dimensions.
|
// Scale both dimensions.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(150, output_width);
|
EXPECT_EQ(150, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
}
|
}
|
||||||
|
@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) {
|
||||||
int output_width;
|
int output_width;
|
||||||
int output_height;
|
int output_height;
|
||||||
// Set no targets, downscale to a multiple of 8.
|
// Set no targets, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(96, output_width);
|
EXPECT_EQ(96, output_width);
|
||||||
EXPECT_EQ(96, output_height);
|
EXPECT_EQ(96, output_height);
|
||||||
// Set width target, downscale to a multiple of 8.
|
// Set width target, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(96, output_width);
|
EXPECT_EQ(96, output_width);
|
||||||
EXPECT_EQ(96, output_height);
|
EXPECT_EQ(96, output_height);
|
||||||
// Set height target, downscale to a multiple of 8.
|
// Set height target, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(200, output_width);
|
EXPECT_EQ(200, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Set both targets, downscale to a multiple of 8.
|
// Set both targets, downscale to a multiple of 8.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(144, output_width);
|
EXPECT_EQ(144, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Doesn't throw error if keep aspect is true and downscale multiple is 2.
|
// Doesn't throw error if keep aspect is true and downscale multiple is 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(400, output_width);
|
EXPECT_EQ(400, output_width);
|
||||||
EXPECT_EQ(200, output_height);
|
EXPECT_EQ(200, output_height);
|
||||||
// Throws error if keep aspect is true, but downscale multiple is not 2.
|
// Throws error if keep aspect is true, but downscale multiple is not 2.
|
||||||
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width,
|
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4,
|
||||||
&output_height),
|
&output_width, &output_height),
|
||||||
testing::Not(testing::status::IsOk()));
|
testing::Not(testing::status::IsOk()));
|
||||||
// Downscaling to multiple ignored if multiple is less than 2.
|
// Downscaling to multiple ignored if multiple is less than 2.
|
||||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width,
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1,
|
||||||
&output_height));
|
&output_width, &output_height));
|
||||||
EXPECT_EQ(401, output_width);
|
EXPECT_EQ(401, output_width);
|
||||||
EXPECT_EQ(201, output_height);
|
EXPECT_EQ(201, output_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests scaling without keeping the aspect ratio fixed.
|
||||||
|
TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) {
|
||||||
|
int output_width;
|
||||||
|
int output_height;
|
||||||
|
// Smaller area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_NEAR(
|
||||||
|
200 / 100,
|
||||||
|
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||||
|
0.1f);
|
||||||
|
EXPECT_LE(output_width * output_height, 9000);
|
||||||
|
// Close to original area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_NEAR(
|
||||||
|
200.0 / 100.0,
|
||||||
|
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||||
|
0.1f);
|
||||||
|
EXPECT_LE(output_width * output_height, 19999);
|
||||||
|
// Don't scale with larger area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
// Don't scale with equal area.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
// Don't scale at all.
|
||||||
|
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2,
|
||||||
|
&output_width, &output_height));
|
||||||
|
EXPECT_EQ(200, output_width);
|
||||||
|
EXPECT_EQ(100, output_height);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace scale_image
|
} // namespace scale_image
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/warp_affine_calculator.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||||
|
#include "mediapipe/calculators/image/warp_affine_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
AffineTransformation::BorderMode GetBorderMode(
|
||||||
|
mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) {
|
||||||
|
switch (border_mode) {
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO:
|
||||||
|
return AffineTransformation::BorderMode::kZero;
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED:
|
||||||
|
case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE:
|
||||||
|
return AffineTransformation::BorderMode::kReplicate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename ImageT>
|
||||||
|
class WarpAffineRunnerHolder {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<ImageFrame> {
|
||||||
|
public:
|
||||||
|
using RunnerType = AffineTransformation::Runner<ImageFrame, ImageFrame>;
|
||||||
|
absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); }
|
||||||
|
absl::StatusOr<RunnerType*> GetRunner() {
|
||||||
|
if (!runner_) {
|
||||||
|
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner());
|
||||||
|
}
|
||||||
|
return runner_.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<RunnerType> runner_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
|
||||||
|
public:
|
||||||
|
using RunnerType =
|
||||||
|
AffineTransformation::Runner<mediapipe::GpuBuffer,
|
||||||
|
std::unique_ptr<mediapipe::GpuBuffer>>;
|
||||||
|
absl::Status Open(CalculatorContext* cc) {
|
||||||
|
gpu_origin_ =
|
||||||
|
cc->Options<mediapipe::WarpAffineCalculatorOptions>().gpu_origin();
|
||||||
|
gl_helper_ = std::make_shared<mediapipe::GlCalculatorHelper>();
|
||||||
|
return gl_helper_->Open(cc);
|
||||||
|
}
|
||||||
|
absl::StatusOr<RunnerType*> GetRunner() {
|
||||||
|
if (!runner_) {
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_));
|
||||||
|
}
|
||||||
|
return runner_.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
mediapipe::GpuOrigin::Mode gpu_origin_;
|
||||||
|
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper_;
|
||||||
|
std::unique_ptr<RunnerType> runner_;
|
||||||
|
};
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class WarpAffineRunnerHolder<mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); }
|
||||||
|
absl::StatusOr<
|
||||||
|
AffineTransformation::Runner<mediapipe::Image, mediapipe::Image>*>
|
||||||
|
GetRunner() {
|
||||||
|
return &runner_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
class Runner : public AffineTransformation::Runner<mediapipe::Image,
|
||||||
|
mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
absl::Status Open(CalculatorContext* cc) {
|
||||||
|
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
absl::StatusOr<mediapipe::Image> Run(
|
||||||
|
const mediapipe::Image& input, const std::array<float, 16>& matrix,
|
||||||
|
const AffineTransformation::Size& size,
|
||||||
|
AffineTransformation::BorderMode border_mode) override {
|
||||||
|
if (input.UsesGpu()) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
|
||||||
|
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
|
||||||
|
size, border_mode));
|
||||||
|
return mediapipe::Image(*result);
|
||||||
|
#else
|
||||||
|
return absl::UnavailableError("GPU support is disabled");
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
}
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
|
||||||
|
const auto& frame_ptr = input.GetImageFrameSharedPtr();
|
||||||
|
// Wrap image into image frame.
|
||||||
|
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
|
||||||
|
frame_ptr->Height(), frame_ptr->WidthStep(),
|
||||||
|
const_cast<uint8_t*>(frame_ptr->PixelData()),
|
||||||
|
[](uint8* data) {});
|
||||||
|
ASSIGN_OR_RETURN(auto result,
|
||||||
|
runner->Run(image_frame, matrix, size, border_mode));
|
||||||
|
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
WarpAffineRunnerHolder<ImageFrame> cpu_holder_;
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
};
|
||||||
|
|
||||||
|
Runner runner_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InterfaceT>
|
||||||
|
class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
|
||||||
|
public:
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||||
|
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
|
||||||
|
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
|
||||||
|
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||||
|
}
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
if (InterfaceT::kInImage(cc).IsEmpty() ||
|
||||||
|
InterfaceT::kMatrix(cc).IsEmpty() ||
|
||||||
|
InterfaceT::kOutputSize(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
|
||||||
|
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
|
||||||
|
AffineTransformation::Size output_size;
|
||||||
|
output_size.width = out_width;
|
||||||
|
output_size.height = out_height;
|
||||||
|
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
auto result,
|
||||||
|
runner->Run(
|
||||||
|
*InterfaceT::kInImage(cc), transform, output_size,
|
||||||
|
GetBorderMode(cc->Options<mediapipe::WarpAffineCalculatorOptions>()
|
||||||
|
.border_mode())));
|
||||||
|
InterfaceT::kOutImage(cc).Send(std::move(result));
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
|
||||||
|
holder_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||||
|
WarpAffineCalculatorImpl<WarpAffineCalculatorCpu>);
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||||
|
WarpAffineCalculatorImpl<WarpAffineCalculatorGpu>);
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl<WarpAffineCalculator>);
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||||
|
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
// Runs affine transformation.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||||
|
//
|
||||||
|
// MATRIX - std::array<float, 16>
|
||||||
|
// Used as following:
|
||||||
|
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||||
|
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||||
|
// where x and y ranges are defined by @OUTPUT_SIZE.
|
||||||
|
//
|
||||||
|
// OUTPUT_SIZE - std::pair<int, int>
|
||||||
|
// Size of the output image.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// - Output image type and format are the same as the input one.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "WarpAffineCalculator(Cpu|Gpu)"
|
||||||
|
// input_stream: "IMAGE:image"
|
||||||
|
// input_stream: "MATRIX:matrix"
|
||||||
|
// input_stream: "OUTPUT_SIZE:size"
|
||||||
|
// output_stream: "IMAGE:transformed_image"
|
||||||
|
// options: {
|
||||||
|
// [mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
// border_mode: BORDER_ZERO
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
template <typename ImageT>
|
||||||
|
class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf {
|
||||||
|
public:
|
||||||
|
static constexpr mediapipe::api2::Input<ImageT> kInImage{"IMAGE"};
|
||||||
|
static constexpr mediapipe::api2::Input<std::array<float, 16>> kMatrix{
|
||||||
|
"MATRIX"};
|
||||||
|
static constexpr mediapipe::api2::Input<std::pair<int, int>> kOutputSize{
|
||||||
|
"OUTPUT_SIZE"};
|
||||||
|
static constexpr mediapipe::api2::Output<ImageT> kOutImage{"IMAGE"};
|
||||||
|
};
|
||||||
|
|
||||||
|
class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf<ImageFrame> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix,
|
||||||
|
kOutputSize, kOutImage);
|
||||||
|
};
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
class WarpAffineCalculatorGpu
|
||||||
|
: public WarpAffineCalculatorIntf<mediapipe::GpuBuffer> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix,
|
||||||
|
kOutputSize, kOutImage);
|
||||||
|
};
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
class WarpAffineCalculator : public WarpAffineCalculatorIntf<mediapipe::Image> {
|
||||||
|
public:
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize,
|
||||||
|
kOutImage);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/gpu/gpu_origin.proto";
|
||||||
|
|
||||||
|
message WarpAffineCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional WarpAffineCalculatorOptions ext = 373693895;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pixel extrapolation methods. See @border_mode.
|
||||||
|
enum BorderMode {
|
||||||
|
BORDER_UNSPECIFIED = 0;
|
||||||
|
BORDER_ZERO = 1;
|
||||||
|
BORDER_REPLICATE = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pixel extrapolation method.
|
||||||
|
// When converting image to tensor it may happen that tensor needs to read
|
||||||
|
// pixels outside image boundaries. Border mode helps to specify how such
|
||||||
|
// pixels will be calculated.
|
||||||
|
//
|
||||||
|
// BORDER_REPLICATE is used by default.
|
||||||
|
optional BorderMode border_mode = 1;
|
||||||
|
|
||||||
|
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||||
|
// to be flipped vertically as tensors are expected to start at top.
|
||||||
|
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||||
|
optional GpuOrigin.Mode gpu_origin = 2;
|
||||||
|
}
|
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
|
@ -0,0 +1,615 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/flags/flag.h"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/substitute.h"
|
||||||
|
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||||
|
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||||
|
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
|
#include "mediapipe/framework/deps/file_path.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||||
|
#include "mediapipe/framework/formats/rect.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/integral_types.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
cv::Mat GetRgb(absl::string_view path) {
|
||||||
|
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||||
|
cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3);
|
||||||
|
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||||
|
cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3);
|
||||||
|
return rgb;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat GetRgba(absl::string_view path) {
|
||||||
|
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||||
|
cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0));
|
||||||
|
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||||
|
cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3);
|
||||||
|
return bgr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test template.
|
||||||
|
// No processing/assertions should be done after the function is invoked.
|
||||||
|
void RunTest(const std::string& graph_text, const std::string& tag,
|
||||||
|
const cv::Mat& input, cv::Mat expected_result,
|
||||||
|
float similarity_threshold, std::array<float, 16> matrix,
|
||||||
|
int out_width, int out_height,
|
||||||
|
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||||
|
std::string border_mode_str;
|
||||||
|
if (border_mode) {
|
||||||
|
switch (*border_mode) {
|
||||||
|
case AffineTransformation::BorderMode::kReplicate:
|
||||||
|
border_mode_str = "border_mode: BORDER_REPLICATE";
|
||||||
|
break;
|
||||||
|
case AffineTransformation::BorderMode::kZero:
|
||||||
|
border_mode_str = "border_mode: BORDER_ZERO";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||||
|
absl::Substitute(graph_text, /*$0=*/border_mode_str));
|
||||||
|
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("output_image", &graph_config, &output_packets);
|
||||||
|
|
||||||
|
// Run the graph.
|
||||||
|
CalculatorGraph graph;
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
|
||||||
|
ImageFrame input_image(
|
||||||
|
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
|
||||||
|
input.cols, input.rows, input.step, input.data, [](uint8*) {});
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"input_image",
|
||||||
|
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"matrix",
|
||||||
|
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"output_size", MakePacket<std::pair<int, int>>(
|
||||||
|
std::pair<int, int>(out_width, out_height))
|
||||||
|
.At(Timestamp(0))));
|
||||||
|
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||||
|
|
||||||
|
// Get and process results.
|
||||||
|
const ImageFrame& out_frame = output_packets[0].Get<ImageFrame>();
|
||||||
|
cv::Mat result = formats::MatView(&out_frame);
|
||||||
|
double similarity =
|
||||||
|
1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2);
|
||||||
|
EXPECT_GE(similarity, similarity_threshold);
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("output_size"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class InputType { kImageFrame, kImage };
|
||||||
|
|
||||||
|
// Similarity is checked against OpenCV results always, and due to differences
|
||||||
|
// on how OpenCV and GL treats pixels there are two thresholds.
|
||||||
|
// TODO: update to have just one threshold when OpenCV
|
||||||
|
// implementation is updated.
|
||||||
|
struct SimilarityConfig {
|
||||||
|
double threshold_on_cpu;
|
||||||
|
double threshold_on_gpu;
|
||||||
|
};
|
||||||
|
|
||||||
|
void RunTest(cv::Mat input, cv::Mat expected_result,
|
||||||
|
const SimilarityConfig& similarity, std::array<float, 16> matrix,
|
||||||
|
int out_width, int out_height,
|
||||||
|
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculatorCpu"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"cpu", input, expected_result, similarity.threshold_on_cpu, matrix,
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_CPU:input_image"
|
||||||
|
output_stream: "IMAGE:input_image_unified"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculator"
|
||||||
|
input_stream: "IMAGE:input_image_unified"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_unified"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "FromImageCalculator"
|
||||||
|
input_stream: "IMAGE:output_image_unified"
|
||||||
|
output_stream: "IMAGE_CPU:output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"cpu_image", input, expected_result, similarity.threshold_on_cpu,
|
||||||
|
matrix, out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "input_image"
|
||||||
|
output_stream: "input_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculatorGpu"
|
||||||
|
input_stream: "IMAGE:input_image_gpu"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_gpu"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "output_image_gpu"
|
||||||
|
output_stream: "output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"gpu", input, expected_result, similarity.threshold_on_gpu, matrix,
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
|
||||||
|
RunTest(R"(
|
||||||
|
input_stream: "input_image"
|
||||||
|
input_stream: "output_size"
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "ImageFrameToGpuBufferCalculator"
|
||||||
|
input_stream: "input_image"
|
||||||
|
output_stream: "input_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:input_image_gpu"
|
||||||
|
output_stream: "IMAGE:input_image_unified"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "WarpAffineCalculator"
|
||||||
|
input_stream: "IMAGE:input_image_unified"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
input_stream: "OUTPUT_SIZE:output_size"
|
||||||
|
output_stream: "IMAGE:output_image_unified"
|
||||||
|
options {
|
||||||
|
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||||
|
$0 # border mode
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "FromImageCalculator"
|
||||||
|
input_stream: "IMAGE:output_image_unified"
|
||||||
|
output_stream: "IMAGE_GPU:output_image_gpu"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "GpuBufferToImageFrameCalculator"
|
||||||
|
input_stream: "output_image_gpu"
|
||||||
|
output_stream: "output_image"
|
||||||
|
}
|
||||||
|
)",
|
||||||
|
"gpu_image", input, expected_result, similarity.threshold_on_gpu,
|
||||||
|
matrix, out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
|
||||||
|
bool keep_aspect_ratio, int out_width,
|
||||||
|
int out_height) {
|
||||||
|
std::array<float, 16> transform_mat;
|
||||||
|
mediapipe::RotatedRect roi_absolute =
|
||||||
|
mediapipe::GetRoi(input.cols, input.rows, roi);
|
||||||
|
mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute)
|
||||||
|
.IgnoreError();
|
||||||
|
mediapipe::GetRotatedSubRectToRectTransformMatrix(
|
||||||
|
roi_absolute, input.cols, input.rows,
|
||||||
|
/*flip_horizontaly=*/false, &transform_mat);
|
||||||
|
return transform_mat;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_with_rotation.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.65f);
|
||||||
|
roi.set_y_center(0.4f);
|
||||||
|
roi.set_width(0.5f);
|
||||||
|
roi.set_height(0.5f);
|
||||||
|
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"medium_sub_rect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 256;
|
||||||
|
int out_height = 256;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = false;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgb(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_with_rotation.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.5f);
|
||||||
|
roi.set_height(1.1f);
|
||||||
|
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/"
|
||||||
|
"large_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||||
|
int out_width = 128;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, NoOp) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.0f);
|
||||||
|
roi.set_height(1.0f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||||
|
int out_width = 64;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kReplicate;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WarpAffineCalculatorTest, NoOpBorderZero) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.0f);
|
||||||
|
roi.set_height(1.0f);
|
||||||
|
roi.set_rotation(0);
|
||||||
|
auto input = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/input.jpg");
|
||||||
|
auto expected_output = GetRgba(
|
||||||
|
"/mediapipe/calculators/"
|
||||||
|
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||||
|
int out_width = 64;
|
||||||
|
int out_height = 128;
|
||||||
|
bool keep_aspect_ratio = true;
|
||||||
|
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||||
|
AffineTransformation::BorderMode::kZero;
|
||||||
|
RunTest(input, expected_output,
|
||||||
|
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||||
|
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||||
|
out_width, out_height, border_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
|
@ -26,6 +26,11 @@ licenses(["notice"])
|
||||||
|
|
||||||
package(default_visibility = ["//visibility:private"])
|
package(default_visibility = ["//visibility:private"])
|
||||||
|
|
||||||
|
exports_files(
|
||||||
|
glob(["testdata/image_to_tensor/*"]),
|
||||||
|
visibility = ["//mediapipe/calculators/image:__subpackages__"],
|
||||||
|
)
|
||||||
|
|
||||||
selects.config_setting_group(
|
selects.config_setting_group(
|
||||||
name = "compute_shader_unavailable",
|
name = "compute_shader_unavailable",
|
||||||
match_any = [
|
match_any = [
|
||||||
|
|
|
@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer;
|
||||||
// TENSORS - std::vector<Tensor>
|
// TENSORS - std::vector<Tensor>
|
||||||
// Vector containing a single Tensor populated with an extrated RGB image.
|
// Vector containing a single Tensor populated with an extrated RGB image.
|
||||||
// MATRIX - std::array<float, 16> @Optional
|
// MATRIX - std::array<float, 16> @Optional
|
||||||
// An std::array<float, 16> representing a 4x4 row-major-order matrix which
|
// An std::array<float, 16> representing a 4x4 row-major-order matrix that
|
||||||
// can be used to map a point on the output tensor to a point on the input
|
// maps a point on the input image to a point on the output tensor, and
|
||||||
// image.
|
// can be used to reverse the mapping by inverting the matrix.
|
||||||
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
||||||
// An std::array<float, 4> representing the letterbox padding from the 4
|
// An std::array<float, 4> representing the letterbox padding from the 4
|
||||||
// sides ([left, top, right, bottom]) of the output image, normalized to
|
// sides ([left, top, right, bottom]) of the output image, normalized to
|
||||||
|
|
|
@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
||||||
uniform sampler2D frame;
|
uniform sampler2D frame;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
$1 // flip
|
vec2 coord = $1
|
||||||
vec4 pixel = texture2D(frame, sample_coordinate);
|
vec4 pixel = texture2D(frame, coord);
|
||||||
$2 // normalize [-1,1]
|
$2 // normalize [-1,1]
|
||||||
fragColor.r = pixel.r; // r channel
|
fragColor.r = pixel.r; // r channel
|
||||||
$3 // g & b channels
|
$3 // g & b channels
|
||||||
|
@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
||||||
})",
|
})",
|
||||||
/*$0=*/single_channel ? "vec1" : "vec4",
|
/*$0=*/single_channel ? "vec1" : "vec4",
|
||||||
/*$1=*/
|
/*$1=*/
|
||||||
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;"
|
flip_vertically_
|
||||||
: "",
|
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
|
||||||
|
: "sample_coordinate;",
|
||||||
/*$2=*/output_range_.has_value()
|
/*$2=*/output_range_.has_value()
|
||||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||||
(output_range_->second - output_range_->first),
|
(output_range_->second - output_range_->first),
|
||||||
|
|
|
@ -587,9 +587,21 @@ cc_library(
|
||||||
"//mediapipe/framework/port:ret_check",
|
"//mediapipe/framework/port:ret_check",
|
||||||
] + select({
|
] + select({
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"//mediapipe/framework/port:file_helpers",
|
|
||||||
],
|
],
|
||||||
}),
|
"//mediapipe:android": [],
|
||||||
|
}) + select(
|
||||||
|
{
|
||||||
|
"//conditions:default": [
|
||||||
|
],
|
||||||
|
},
|
||||||
|
) + select(
|
||||||
|
{
|
||||||
|
"//conditions:default": [
|
||||||
|
],
|
||||||
|
"//mediapipe:android": [
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
||||||
const char kImageTag[] = "IMAGE";
|
const char kImageTag[] = "IMAGE";
|
||||||
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
||||||
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
||||||
|
const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_";
|
||||||
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||||
const char kBBoxTag[] = "BBOX";
|
const char kBBoxTag[] = "BBOX";
|
||||||
const char kKeypointsTag[] = "KEYPOINTS";
|
const char kKeypointsTag[] = "KEYPOINTS";
|
||||||
|
@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
||||||
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||||
|
cc->Inputs().Tag(tag).Set<std::vector<std::string>>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
|
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
|
||||||
|
@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
mpms::ClearFeatureFloats(key, sequence_.get());
|
mpms::ClearFeatureFloats(key, sequence_.get());
|
||||||
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||||
|
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||||
|
sizeof(*kBytesFeaturePrefixTag) -
|
||||||
|
1);
|
||||||
|
mpms::ClearFeatureBytes(key, sequence_.get());
|
||||||
|
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||||
|
}
|
||||||
if (absl::StartsWith(tag, kKeypointsTag)) {
|
if (absl::StartsWith(tag, kKeypointsTag)) {
|
||||||
std::string key =
|
std::string key =
|
||||||
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
|
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
|
||||||
|
@ -405,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
||||||
cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
||||||
sequence_.get());
|
sequence_.get());
|
||||||
}
|
}
|
||||||
|
if (absl::StartsWith(tag, kBytesFeaturePrefixTag) &&
|
||||||
|
!cc->Inputs().Tag(tag).IsEmpty()) {
|
||||||
|
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||||
|
sizeof(*kBytesFeaturePrefixTag) -
|
||||||
|
1);
|
||||||
|
mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(),
|
||||||
|
sequence_.get());
|
||||||
|
mpms::AddFeatureBytes(
|
||||||
|
key, cc->Inputs().Tag(tag).Get<std::vector<std::string>>(),
|
||||||
|
sequence_.get());
|
||||||
|
}
|
||||||
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
|
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
|
||||||
std::string key = "";
|
std::string key = "";
|
||||||
if (tag != kBBoxTag) {
|
if (tag != kBBoxTag) {
|
||||||
|
|
|
@ -49,6 +49,8 @@ constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST";
|
||||||
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
|
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
|
||||||
constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
|
constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
|
||||||
constexpr char kAudioTestTag[] = "AUDIO_TEST";
|
constexpr char kAudioTestTag[] = "AUDIO_TEST";
|
||||||
|
constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER";
|
||||||
|
constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST";
|
||||||
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||||
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
|
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
|
||||||
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
|
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
|
||||||
|
@ -215,6 +217,54 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) {
|
||||||
|
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||||
|
false, true);
|
||||||
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("foo", 2 << i));
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kBytesFeatureTestTag)
|
||||||
|
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||||
|
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("bar", 2 << i));
|
||||||
|
runner_->MutableInputs()
|
||||||
|
->Tag(kBytesFeatureOtherTag)
|
||||||
|
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence));
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i));
|
||||||
|
ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i),
|
||||||
|
::testing::ElementsAreArray(
|
||||||
|
std::vector<std::string>(2, absl::StrCat("foo", 2 << i))));
|
||||||
|
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i));
|
||||||
|
ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i),
|
||||||
|
::testing::ElementsAreArray(
|
||||||
|
std::vector<std::string>(2, absl::StrCat("bar", 2 << i))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
|
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
|
||||||
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
|
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
|
||||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
@ -829,6 +879,45 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) {
|
||||||
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) {
|
||||||
|
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||||
|
false, true);
|
||||||
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
||||||
|
int num_timesteps = 2;
|
||||||
|
for (int i = 0; i < num_timesteps; ++i) {
|
||||||
|
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("foo", 2 << i));
|
||||||
|
mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get());
|
||||||
|
mpms::AddFeatureTimestamp("TEST", i, input_sequence.get());
|
||||||
|
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||||
|
2, absl::StrCat("bar", 2 << i));
|
||||||
|
mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get());
|
||||||
|
mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get());
|
||||||
|
}
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("TEST", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps,
|
||||||
|
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
|
||||||
|
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence));
|
||||||
|
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||||
|
Adopt(input_sequence.release());
|
||||||
|
|
||||||
|
MP_ASSERT_OK(runner_->Run());
|
||||||
|
|
||||||
|
const std::vector<Packet>& output_packets =
|
||||||
|
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
const tf::SequenceExample& output_sequence =
|
||||||
|
output_packets[0].Get<tf::SequenceExample>();
|
||||||
|
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||||
|
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
|
TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
|
||||||
SetUpCalculator({"IMAGE:images"}, {}, false, true);
|
SetUpCalculator({"IMAGE:images"}, {}, false, true);
|
||||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||||
|
|
|
@ -162,6 +162,27 @@ selects.config_setting_group(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "edge_tpu_usb",
|
||||||
|
define_values = {
|
||||||
|
"MEDIAPIPE_EDGE_TPU": "usb",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "edge_tpu_pci",
|
||||||
|
define_values = {
|
||||||
|
"MEDIAPIPE_EDGE_TPU": "pci",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "edge_tpu_all",
|
||||||
|
define_values = {
|
||||||
|
"MEDIAPIPE_EDGE_TPU": "all",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "tflite_inference_calculator",
|
name = "tflite_inference_calculator",
|
||||||
srcs = ["tflite_inference_calculator.cc"],
|
srcs = ["tflite_inference_calculator.cc"],
|
||||||
|
@ -172,6 +193,12 @@ cc_library(
|
||||||
],
|
],
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
}),
|
}),
|
||||||
|
defines = select({
|
||||||
|
"//conditions:default": [],
|
||||||
|
":edge_tpu_usb": ["MEDIAPIPE_EDGE_TPU=usb"],
|
||||||
|
":edge_tpu_pci": ["MEDIAPIPE_EDGE_TPU=pci"],
|
||||||
|
":edge_tpu_all": ["MEDIAPIPE_EDGE_TPU=all"],
|
||||||
|
}),
|
||||||
linkopts = select({
|
linkopts = select({
|
||||||
"//mediapipe:ios": [
|
"//mediapipe:ios": [
|
||||||
"-framework CoreVideo",
|
"-framework CoreVideo",
|
||||||
|
@ -223,6 +250,20 @@ cc_library(
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"//mediapipe/util:cpu_util",
|
"//mediapipe/util:cpu_util",
|
||||||
],
|
],
|
||||||
|
}) + select({
|
||||||
|
"//conditions:default": [],
|
||||||
|
":edge_tpu_usb": [
|
||||||
|
"@libedgetpu//tflite/public:edgetpu",
|
||||||
|
"@libedgetpu//tflite/public:oss_edgetpu_direct_usb",
|
||||||
|
],
|
||||||
|
":edge_tpu_pci": [
|
||||||
|
"@libedgetpu//tflite/public:edgetpu",
|
||||||
|
"@libedgetpu//tflite/public:oss_edgetpu_direct_pci",
|
||||||
|
],
|
||||||
|
":edge_tpu_all": [
|
||||||
|
"@libedgetpu//tflite/public:edgetpu",
|
||||||
|
"@libedgetpu//tflite/public:oss_edgetpu_direct_all",
|
||||||
|
],
|
||||||
}),
|
}),
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
|
@ -85,7 +85,22 @@ constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||||
#include "edgetpu.h"
|
#include "tflite/public/edgetpu.h"
|
||||||
|
|
||||||
|
// Checkes whether model contains Edge TPU custom op or not.
|
||||||
|
bool ContainsEdgeTpuCustomOp(const tflite::FlatBufferModel& model) {
|
||||||
|
const auto* opcodes = model.GetModel()->operator_codes();
|
||||||
|
for (const auto* subgraph : *model.GetModel()->subgraphs()) {
|
||||||
|
for (const auto* op : *subgraph->operators()) {
|
||||||
|
const auto* opcode = opcodes->Get(op->opcode_index());
|
||||||
|
if (opcode->custom_code() &&
|
||||||
|
opcode->custom_code()->str() == edgetpu::kCustomOp) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Creates and returns an Edge TPU interpreter to run the given edgetpu model.
|
// Creates and returns an Edge TPU interpreter to run the given edgetpu model.
|
||||||
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
||||||
|
@ -94,14 +109,9 @@ std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
||||||
edgetpu::EdgeTpuContext* edgetpu_context) {
|
edgetpu::EdgeTpuContext* edgetpu_context) {
|
||||||
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
|
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
|
||||||
std::unique_ptr<tflite::Interpreter> interpreter;
|
std::unique_ptr<tflite::Interpreter> interpreter;
|
||||||
if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) {
|
CHECK_EQ(tflite::InterpreterBuilder(model, *resolver)(&interpreter),
|
||||||
std::cerr << "Failed to build edge TPU interpreter." << std::endl;
|
kTfLiteOk);
|
||||||
}
|
|
||||||
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
|
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
|
||||||
interpreter->SetNumThreads(1);
|
|
||||||
if (interpreter->AllocateTensors() != kTfLiteOk) {
|
|
||||||
std::cerr << "Failed to allocate edge TPU tensors." << std::endl;
|
|
||||||
}
|
|
||||||
return interpreter;
|
return interpreter;
|
||||||
}
|
}
|
||||||
#endif // MEDIAPIPE_EDGE_TPU
|
#endif // MEDIAPIPE_EDGE_TPU
|
||||||
|
@ -279,8 +289,7 @@ class TfLiteInferenceCalculator : public CalculatorBase {
|
||||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||||
|
|
||||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||||
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_ =
|
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_;
|
||||||
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool gpu_inference_ = false;
|
bool gpu_inference_ = false;
|
||||||
|
@ -303,6 +312,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
|
||||||
// Calculator Core Section
|
// Calculator Core Section
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kCustomOpResolverTag[] = "CUSTOM_OP_RESOLVER";
|
||||||
|
constexpr char kModelTag[] = "MODEL";
|
||||||
|
|
||||||
template <class CC>
|
template <class CC>
|
||||||
bool ShouldUseGpu(CC* cc) {
|
bool ShouldUseGpu(CC* cc) {
|
||||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||||
|
@ -327,7 +340,7 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
|
||||||
const auto& options =
|
const auto& options =
|
||||||
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
|
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
|
||||||
RET_CHECK(!options.model_path().empty() ^
|
RET_CHECK(!options.model_path().empty() ^
|
||||||
cc->InputSidePackets().HasTag("MODEL"))
|
cc->InputSidePackets().HasTag(kModelTag))
|
||||||
<< "Either model as side packet or model path in options is required.";
|
<< "Either model as side packet or model path in options is required.";
|
||||||
|
|
||||||
if (cc->Inputs().HasTag(kTensorsTag))
|
if (cc->Inputs().HasTag(kTensorsTag))
|
||||||
|
@ -340,13 +353,13 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
|
||||||
if (cc->Outputs().HasTag(kTensorsGpuTag))
|
if (cc->Outputs().HasTag(kTensorsGpuTag))
|
||||||
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
|
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||||
cc->InputSidePackets()
|
cc->InputSidePackets()
|
||||||
.Tag("CUSTOM_OP_RESOLVER")
|
.Tag(kCustomOpResolverTag)
|
||||||
.Set<tflite::ops::builtin::BuiltinOpResolver>();
|
.Set<tflite::ops::builtin::BuiltinOpResolver>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("MODEL")) {
|
if (cc->InputSidePackets().HasTag(kModelTag)) {
|
||||||
cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
|
cc->InputSidePackets().Tag(kModelTag).Set<TfLiteModelPtr>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ShouldUseGpu(cc)) {
|
if (ShouldUseGpu(cc)) {
|
||||||
|
@ -486,8 +499,8 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
|
||||||
MP_RETURN_IF_ERROR(WriteKernelsToFile());
|
MP_RETURN_IF_ERROR(WriteKernelsToFile());
|
||||||
|
|
||||||
return RunInContextIfNeeded([this]() -> absl::Status {
|
return RunInContextIfNeeded([this]() -> absl::Status {
|
||||||
|
interpreter_ = nullptr;
|
||||||
if (delegate_) {
|
if (delegate_) {
|
||||||
interpreter_ = nullptr;
|
|
||||||
delegate_ = nullptr;
|
delegate_ = nullptr;
|
||||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||||
if (gpu_inference_) {
|
if (gpu_inference_) {
|
||||||
|
@ -501,7 +514,7 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
|
||||||
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||||
}
|
}
|
||||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||||
edgetpu_context_.reset();
|
edgetpu_context_ = nullptr;
|
||||||
#endif
|
#endif
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
});
|
});
|
||||||
|
@ -723,9 +736,9 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
|
||||||
auto op_resolver_ptr =
|
auto op_resolver_ptr =
|
||||||
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
||||||
&default_op_resolver);
|
&default_op_resolver);
|
||||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||||
op_resolver_ptr = &(cc->InputSidePackets()
|
op_resolver_ptr = &(cc->InputSidePackets()
|
||||||
.Tag("CUSTOM_OP_RESOLVER")
|
.Tag(kCustomOpResolverTag)
|
||||||
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -825,21 +838,26 @@ absl::Status TfLiteInferenceCalculator::LoadModel(CalculatorContext* cc) {
|
||||||
|
|
||||||
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates
|
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates
|
||||||
default_op_resolver;
|
default_op_resolver;
|
||||||
auto op_resolver_ptr =
|
|
||||||
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
|
||||||
&default_op_resolver);
|
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
|
||||||
op_resolver_ptr = &(cc->InputSidePackets()
|
|
||||||
.Tag("CUSTOM_OP_RESOLVER")
|
|
||||||
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||||
interpreter_ =
|
if (ContainsEdgeTpuCustomOp(model)) {
|
||||||
BuildEdgeTpuInterpreter(model, op_resolver_ptr, edgetpu_context_.get());
|
edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
|
||||||
#else
|
interpreter_ = BuildEdgeTpuInterpreter(model, &default_op_resolver,
|
||||||
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_);
|
edgetpu_context_.get());
|
||||||
|
} else {
|
||||||
|
#endif // MEDIAPIPE_EDGE_TPU
|
||||||
|
auto op_resolver_ptr =
|
||||||
|
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
||||||
|
&default_op_resolver);
|
||||||
|
|
||||||
|
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||||
|
op_resolver_ptr = &(cc->InputSidePackets()
|
||||||
|
.Tag(kCustomOpResolverTag)
|
||||||
|
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
||||||
|
}
|
||||||
|
|
||||||
|
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_);
|
||||||
|
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||||
|
}
|
||||||
#endif // MEDIAPIPE_EDGE_TPU
|
#endif // MEDIAPIPE_EDGE_TPU
|
||||||
|
|
||||||
RET_CHECK(interpreter_);
|
RET_CHECK(interpreter_);
|
||||||
|
@ -872,8 +890,8 @@ absl::StatusOr<Packet> TfLiteInferenceCalculator::GetModelAsPacket(
|
||||||
if (!options.model_path().empty()) {
|
if (!options.model_path().empty()) {
|
||||||
return TfLiteModelLoader::LoadFromPath(options.model_path());
|
return TfLiteModelLoader::LoadFromPath(options.model_path());
|
||||||
}
|
}
|
||||||
if (cc.InputSidePackets().HasTag("MODEL")) {
|
if (cc.InputSidePackets().HasTag(kModelTag)) {
|
||||||
return cc.InputSidePackets().Tag("MODEL");
|
return cc.InputSidePackets().Tag(kModelTag);
|
||||||
}
|
}
|
||||||
return absl::Status(absl::StatusCode::kNotFound,
|
return absl::Status(absl::StatusCode::kNotFound,
|
||||||
"Must specify TFLite model as path or loaded model.");
|
"Must specify TFLite model as path or loaded model.");
|
||||||
|
@ -929,6 +947,8 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
|
||||||
kTfLiteOk);
|
kTfLiteOk);
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
(void)use_xnnpack;
|
||||||
#endif // !EDGETPU
|
#endif // !EDGETPU
|
||||||
|
|
||||||
// Return and use default tflite infernece (on CPU). No need for GPU
|
// Return and use default tflite infernece (on CPU). No need for GPU
|
||||||
|
|
|
@ -1353,3 +1353,34 @@ cc_test(
|
||||||
"//mediapipe/framework/port:gtest_main",
|
"//mediapipe/framework/port:gtest_main",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "inverse_matrix_calculator",
|
||||||
|
srcs = ["inverse_matrix_calculator.cc"],
|
||||||
|
hdrs = ["inverse_matrix_calculator.h"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
alwayslink = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "inverse_matrix_calculator_test",
|
||||||
|
srcs = ["inverse_matrix_calculator_test.cc"],
|
||||||
|
tags = ["desktop_only_test"],
|
||||||
|
deps = [
|
||||||
|
":inverse_matrix_calculator",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:integral_types",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -33,6 +33,7 @@ namespace {
|
||||||
constexpr char kImageFrameTag[] = "IMAGE_CPU";
|
constexpr char kImageFrameTag[] = "IMAGE_CPU";
|
||||||
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
||||||
constexpr char kImageTag[] = "IMAGE";
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
constexpr char kSourceOnGpuTag[] = "SOURCE_ON_GPU";
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// A calculator for converting the unified image container into
|
// A calculator for converting the unified image container into
|
||||||
|
@ -46,6 +47,8 @@ constexpr char kImageTag[] = "IMAGE";
|
||||||
// IMAGE_CPU: An ImageFrame containing output image.
|
// IMAGE_CPU: An ImageFrame containing output image.
|
||||||
// IMAGE_GPU: A GpuBuffer containing output image.
|
// IMAGE_GPU: A GpuBuffer containing output image.
|
||||||
//
|
//
|
||||||
|
// SOURCE_ON_GPU: The source Image is stored on GPU or CPU.
|
||||||
|
//
|
||||||
// Note:
|
// Note:
|
||||||
// Data is automatically transferred to/from the CPU or GPU
|
// Data is automatically transferred to/from the CPU or GPU
|
||||||
// depending on output type.
|
// depending on output type.
|
||||||
|
@ -66,6 +69,7 @@ class FromImageCalculator : public CalculatorBase {
|
||||||
absl::Status RenderGpu(CalculatorContext* cc);
|
absl::Status RenderGpu(CalculatorContext* cc);
|
||||||
absl::Status RenderCpu(CalculatorContext* cc);
|
absl::Status RenderCpu(CalculatorContext* cc);
|
||||||
|
|
||||||
|
bool check_image_source_ = false;
|
||||||
bool gpu_output_ = false;
|
bool gpu_output_ = false;
|
||||||
bool gpu_initialized_ = false;
|
bool gpu_initialized_ = false;
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
@ -102,6 +106,9 @@ absl::Status FromImageCalculator::GetContract(CalculatorContract* cc) {
|
||||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
|
||||||
|
cc->Outputs().Tag(kSourceOnGpuTag).Set<bool>();
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,7 +118,9 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
|
||||||
if (cc->Outputs().HasTag(kGpuBufferTag)) {
|
if (cc->Outputs().HasTag(kGpuBufferTag)) {
|
||||||
gpu_output_ = true;
|
gpu_output_ = true;
|
||||||
}
|
}
|
||||||
|
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
|
||||||
|
check_image_source_ = true;
|
||||||
|
}
|
||||||
if (gpu_output_) {
|
if (gpu_output_) {
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||||
|
@ -122,6 +131,13 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status FromImageCalculator::Process(CalculatorContext* cc) {
|
absl::Status FromImageCalculator::Process(CalculatorContext* cc) {
|
||||||
|
if (check_image_source_) {
|
||||||
|
auto& input = cc->Inputs().Tag(kImageTag).Get<mediapipe::Image>();
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kSourceOnGpuTag)
|
||||||
|
.AddPacket(MakePacket<bool>(input.UsesGpu()).At(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
|
||||||
if (gpu_output_) {
|
if (gpu_output_) {
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status {
|
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status {
|
||||||
|
|
50
mediapipe/calculators/util/inverse_matrix_calculator.cc
Normal file
50
mediapipe/calculators/util/inverse_matrix_calculator.cc
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "Eigen/Geometry"
|
||||||
|
#include "Eigen/LU"
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace api2 {
|
||||||
|
|
||||||
|
class InverseMatrixCalculatorImpl : public NodeImpl<InverseMatrixCalculator> {
|
||||||
|
absl::Status Process(mediapipe::CalculatorContext* cc) override {
|
||||||
|
if (kInputMatrix(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> matrix(
|
||||||
|
kInputMatrix(cc).Get().data());
|
||||||
|
|
||||||
|
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> inverse_matrix;
|
||||||
|
bool inverse_check;
|
||||||
|
matrix.computeInverseWithCheck(inverse_matrix, inverse_check);
|
||||||
|
RET_CHECK(inverse_check) << "Inverse matrix cannot be calculated.";
|
||||||
|
|
||||||
|
std::array<float, 16> output;
|
||||||
|
Eigen::Map<Eigen::Matrix<float, 4, 4, Eigen::RowMajor>>(
|
||||||
|
output.data(), 4, 4) = inverse_matrix.matrix();
|
||||||
|
kOutputMatrix(cc).Send(std::move(output));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MEDIAPIPE_NODE_IMPLEMENTATION(InverseMatrixCalculatorImpl);
|
||||||
|
|
||||||
|
} // namespace api2
|
||||||
|
} // namespace mediapipe
|
51
mediapipe/calculators/util/inverse_matrix_calculator.h
Normal file
51
mediapipe/calculators/util/inverse_matrix_calculator.h
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
||||||
|
#define MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
||||||
|
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
// Runs affine transformation.
|
||||||
|
//
|
||||||
|
// Input:
|
||||||
|
// MATRIX - std::array<float, 16>
|
||||||
|
// Row major 4x4 matrix to inverse.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// MATRIX - std::array<float, 16>
|
||||||
|
// Row major 4x4 inversed matrix.
|
||||||
|
//
|
||||||
|
// Usage example:
|
||||||
|
// node {
|
||||||
|
// calculator: "dishti.aimatter.InverseMatrixCalculator"
|
||||||
|
// input_stream: "MATRIX:input_matrix"
|
||||||
|
// output_stream: "MATRIX:output_matrix"
|
||||||
|
// }
|
||||||
|
class InverseMatrixCalculator : public mediapipe::api2::NodeIntf {
|
||||||
|
public:
|
||||||
|
static constexpr mediapipe::api2::Input<std::array<float, 16>> kInputMatrix{
|
||||||
|
"MATRIX"};
|
||||||
|
static constexpr mediapipe::api2::Output<std::array<float, 16>> kOutputMatrix{
|
||||||
|
"MATRIX"};
|
||||||
|
MEDIAPIPE_NODE_INTERFACE(InverseMatrixCalculator, kInputMatrix,
|
||||||
|
kOutputMatrix);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
126
mediapipe/calculators/util/inverse_matrix_calculator_test.cc
Normal file
126
mediapipe/calculators/util/inverse_matrix_calculator_test.cc
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/integral_types.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void RunTest(const std::array<float, 16>& matrix,
|
||||||
|
const std::array<float, 16>& expected_inverse_matrix) {
|
||||||
|
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||||
|
R"pb(
|
||||||
|
input_stream: "matrix"
|
||||||
|
node {
|
||||||
|
calculator: "InverseMatrixCalculator"
|
||||||
|
input_stream: "MATRIX:matrix"
|
||||||
|
output_stream: "MATRIX:inverse_matrix"
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("inverse_matrix", &graph_config, &output_packets);
|
||||||
|
|
||||||
|
// Run the graph.
|
||||||
|
CalculatorGraph graph;
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"matrix",
|
||||||
|
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
|
||||||
|
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||||
|
|
||||||
|
const auto& inverse_matrix = output_packets[0].Get<std::array<float, 16>>();
|
||||||
|
|
||||||
|
EXPECT_THAT(inverse_matrix, testing::Eq(expected_inverse_matrix));
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InverseMatrixCalculatorTest, Identity) {
|
||||||
|
// clang-format off
|
||||||
|
std::array<float, 16> matrix = {
|
||||||
|
1.0f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::array<float, 16> expected_inverse_matrix = {
|
||||||
|
1.0f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 1.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
RunTest(matrix, expected_inverse_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InverseMatrixCalculatorTest, Translation) {
|
||||||
|
// clang-format off
|
||||||
|
std::array<float, 16> matrix = {
|
||||||
|
1.0f, 0.0f, 0.0f, 2.0f,
|
||||||
|
0.0f, 1.0f, 0.0f, -5.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::array<float, 16> expected_inverse_matrix = {
|
||||||
|
1.0f, 0.0f, 0.0f, -2.0f,
|
||||||
|
0.0f, 1.0f, 0.0f, 5.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
RunTest(matrix, expected_inverse_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InverseMatrixCalculatorTest, Scale) {
|
||||||
|
// clang-format off
|
||||||
|
std::array<float, 16> matrix = {
|
||||||
|
5.0f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 2.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::array<float, 16> expected_inverse_matrix = {
|
||||||
|
0.2f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.5f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
RunTest(matrix, expected_inverse_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InverseMatrixCalculatorTest, Rotation90) {
|
||||||
|
// clang-format off
|
||||||
|
std::array<float, 16> matrix = {
|
||||||
|
0.0f, -1.0f, 0.0f, 0.0f,
|
||||||
|
1.0f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::array<float, 16> expected_inverse_matrix = {
|
||||||
|
0.0f, 1.0f, 0.0f, 0.0f,
|
||||||
|
-1.0f, 0.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
RunTest(matrix, expected_inverse_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
16
mediapipe/examples/android/solutions/create_win_symlinks.bat
Normal file
16
mediapipe/examples/android/solutions/create_win_symlinks.bat
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
@rem Remove the current res dir symlinks that are for Linux and macOS and recreate res dir symlinks for Windows.
|
||||||
|
@rem This script needs administrator permission. Must run this script as administrator.
|
||||||
|
|
||||||
|
@rem for hands example app.
|
||||||
|
cd /d %~dp0
|
||||||
|
cd hands\src\main
|
||||||
|
rm res
|
||||||
|
mklink /d res ..\..\..\res
|
||||||
|
|
||||||
|
@rem for facemesh example app.
|
||||||
|
cd /d %~dp0
|
||||||
|
cd facemesh\src\main
|
||||||
|
rm res
|
||||||
|
mklink /d res ..\..\..\res
|
||||||
|
dir
|
||||||
|
pause
|
50
mediapipe/examples/android/solutions/facemesh/build.gradle
Normal file
50
mediapipe/examples/android/solutions/facemesh/build.gradle
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
plugins {
|
||||||
|
id 'com.android.application'
|
||||||
|
}
|
||||||
|
|
||||||
|
android {
|
||||||
|
compileSdkVersion 30
|
||||||
|
buildToolsVersion "30.0.3"
|
||||||
|
|
||||||
|
defaultConfig {
|
||||||
|
applicationId "com.google.mediapipe.apps.hands"
|
||||||
|
minSdkVersion 21
|
||||||
|
targetSdkVersion 30
|
||||||
|
versionCode 1
|
||||||
|
versionName "1.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
buildTypes {
|
||||||
|
release {
|
||||||
|
minifyEnabled false
|
||||||
|
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
compileOptions {
|
||||||
|
sourceCompatibility JavaVersion.VERSION_1_8
|
||||||
|
targetCompatibility JavaVersion.VERSION_1_8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar'])
|
||||||
|
implementation 'androidx.appcompat:appcompat:1.3.0'
|
||||||
|
implementation 'com.google.android.material:material:1.3.0'
|
||||||
|
implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
|
||||||
|
testImplementation 'junit:junit:4.+'
|
||||||
|
androidTestImplementation 'androidx.test.ext:junit:1.1.2'
|
||||||
|
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
|
||||||
|
// MediaPipe hands solution API and solution-core.
|
||||||
|
implementation 'com.google.mediapipe:solution-core:latest.release'
|
||||||
|
implementation 'com.google.mediapipe:facemesh:latest.release'
|
||||||
|
// MediaPipe deps
|
||||||
|
implementation 'com.google.flogger:flogger:latest.release'
|
||||||
|
implementation 'com.google.flogger:flogger-system-backend:latest.release'
|
||||||
|
implementation 'com.google.guava:guava:27.0.1-android'
|
||||||
|
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||||
|
// CameraX core library
|
||||||
|
def camerax_version = "1.0.0-beta10"
|
||||||
|
implementation "androidx.camera:camera-core:$camerax_version"
|
||||||
|
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||||
|
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||||
|
}
|
21
mediapipe/examples/android/solutions/facemesh/proguard-rules.pro
vendored
Normal file
21
mediapipe/examples/android/solutions/facemesh/proguard-rules.pro
vendored
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# Add project specific ProGuard rules here.
|
||||||
|
# You can control the set of applied configuration files using the
|
||||||
|
# proguardFiles setting in build.gradle.
|
||||||
|
#
|
||||||
|
# For more details, see
|
||||||
|
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||||
|
|
||||||
|
# If your project uses WebView with JS, uncomment the following
|
||||||
|
# and specify the fully qualified class name to the JavaScript interface
|
||||||
|
# class:
|
||||||
|
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||||
|
# public *;
|
||||||
|
#}
|
||||||
|
|
||||||
|
# Uncomment this to preserve the line number information for
|
||||||
|
# debugging stack traces.
|
||||||
|
#-keepattributes SourceFile,LineNumberTable
|
||||||
|
|
||||||
|
# If you keep the line number information, uncomment this to
|
||||||
|
# hide the original source file name.
|
||||||
|
#-renamesourcefileattribute SourceFile
|
|
@ -0,0 +1,32 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||||
|
package="com.google.mediapipe.examples.facemesh">
|
||||||
|
|
||||||
|
<uses-sdk
|
||||||
|
android:minSdkVersion="21"
|
||||||
|
android:targetSdkVersion="30" />
|
||||||
|
|
||||||
|
<!-- For loading images from gallery -->
|
||||||
|
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
||||||
|
<!-- For using the camera -->
|
||||||
|
<uses-permission android:name="android.permission.CAMERA" />
|
||||||
|
<uses-feature android:name="android.hardware.camera" />
|
||||||
|
|
||||||
|
<application
|
||||||
|
android:allowBackup="true"
|
||||||
|
android:icon="@mipmap/ic_launcher"
|
||||||
|
android:label="MediaPipe FaceMesh"
|
||||||
|
android:roundIcon="@mipmap/ic_launcher_round"
|
||||||
|
android:supportsRtl="true"
|
||||||
|
android:theme="@style/AppTheme">
|
||||||
|
<activity android:name=".MainActivity"
|
||||||
|
android:screenOrientation="portrait">
|
||||||
|
<intent-filter>
|
||||||
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|
||||||
|
<category android:name="android.intent.category.LAUNCHER" />
|
||||||
|
</intent-filter>
|
||||||
|
</activity>
|
||||||
|
</application>
|
||||||
|
|
||||||
|
</manifest>
|
44
mediapipe/examples/android/solutions/facemesh/src/main/BUILD
Normal file
44
mediapipe/examples/android/solutions/facemesh/src/main/BUILD
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:private"])
|
||||||
|
|
||||||
|
android_binary(
|
||||||
|
name = "facemesh",
|
||||||
|
srcs = glob(["**/*.java"]),
|
||||||
|
custom_package = "com.google.mediapipe.examples.facemesh",
|
||||||
|
manifest = "AndroidManifest.xml",
|
||||||
|
manifest_values = {
|
||||||
|
"applicationId": "com.google.mediapipe.examples.facemesh",
|
||||||
|
},
|
||||||
|
multidex = "native",
|
||||||
|
resource_files = ["//mediapipe/examples/android/solutions:resource_files"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutions/facemesh",
|
||||||
|
"//third_party:androidx_appcompat",
|
||||||
|
"//third_party:androidx_constraint_layout",
|
||||||
|
"//third_party:opencv",
|
||||||
|
"@maven//:androidx_activity_activity",
|
||||||
|
"@maven//:androidx_concurrent_concurrent_futures",
|
||||||
|
"@maven//:androidx_fragment_fragment",
|
||||||
|
"@maven//:com_google_guava_guava",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,186 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.examples.facemesh;
|
||||||
|
|
||||||
|
import android.opengl.GLES20;
|
||||||
|
import android.opengl.Matrix;
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
|
import com.google.mediapipe.solutioncore.ResultGlBoundary;
|
||||||
|
import com.google.mediapipe.solutioncore.ResultGlRenderer;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.FloatBuffer;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** A custom implementation of {@link ResultGlRenderer} to render MediaPope FaceMesh results. */
|
||||||
|
public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult> {
|
||||||
|
private static final String TAG = "FaceMeshResultGlRenderer";
|
||||||
|
|
||||||
|
private static final float[] TESSELATION_COLOR = new float[] {0.75f, 0.75f, 0.75f, 0.5f};
|
||||||
|
private static final int TESSELATION_THICKNESS = 5;
|
||||||
|
private static final float[] RIGHT_EYE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||||
|
private static final int RIGHT_EYE_THICKNESS = 8;
|
||||||
|
private static final float[] RIGHT_EYEBROW_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||||
|
private static final int RIGHT_EYEBROW_THICKNESS = 8;
|
||||||
|
private static final float[] LEFT_EYE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||||
|
private static final int LEFT_EYE_THICKNESS = 8;
|
||||||
|
private static final float[] LEFT_EYEBROW_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||||
|
private static final int LEFT_EYEBROW_THICKNESS = 8;
|
||||||
|
private static final float[] FACE_OVAL_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
|
||||||
|
private static final int FACE_OVAL_THICKNESS = 8;
|
||||||
|
private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
|
||||||
|
private static final int LIPS_THICKNESS = 8;
|
||||||
|
private static final String VERTEX_SHADER =
|
||||||
|
"uniform mat4 uTransformMatrix;\n"
|
||||||
|
+ "attribute vec4 vPosition;\n"
|
||||||
|
+ "void main() {\n"
|
||||||
|
+ " gl_Position = uTransformMatrix * vPosition;\n"
|
||||||
|
+ "}";
|
||||||
|
private static final String FRAGMENT_SHADER =
|
||||||
|
"precision mediump float;\n"
|
||||||
|
+ "uniform vec4 uColor;\n"
|
||||||
|
+ "void main() {\n"
|
||||||
|
+ " gl_FragColor = uColor;\n"
|
||||||
|
+ "}";
|
||||||
|
private int program;
|
||||||
|
private int positionHandle;
|
||||||
|
private int transformMatrixHandle;
|
||||||
|
private int colorHandle;
|
||||||
|
private final float[] transformMatrix = new float[16];
|
||||||
|
|
||||||
|
private int loadShader(int type, String shaderCode) {
|
||||||
|
int shader = GLES20.glCreateShader(type);
|
||||||
|
GLES20.glShaderSource(shader, shaderCode);
|
||||||
|
GLES20.glCompileShader(shader);
|
||||||
|
return shader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setupRendering() {
|
||||||
|
program = GLES20.glCreateProgram();
|
||||||
|
int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER);
|
||||||
|
int fragmentShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER);
|
||||||
|
GLES20.glAttachShader(program, vertexShader);
|
||||||
|
GLES20.glAttachShader(program, fragmentShader);
|
||||||
|
GLES20.glLinkProgram(program);
|
||||||
|
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
|
||||||
|
transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix");
|
||||||
|
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void renderResult(FaceMeshResult result, ResultGlBoundary boundary) {
|
||||||
|
if (result == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
GLES20.glUseProgram(program);
|
||||||
|
// Sets the transform matrix to align the result rendering with the scaled output texture.
|
||||||
|
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
|
||||||
|
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
|
||||||
|
// top-left corner.
|
||||||
|
Matrix.setIdentityM(transformMatrix, 0);
|
||||||
|
Matrix.scaleM(
|
||||||
|
transformMatrix,
|
||||||
|
0,
|
||||||
|
2 / (boundary.right() - boundary.left()),
|
||||||
|
-2 / (boundary.top() - boundary.bottom()),
|
||||||
|
1.0f);
|
||||||
|
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
|
||||||
|
|
||||||
|
int numFaces = result.multiFaceLandmarks().size();
|
||||||
|
for (int i = 0; i < numFaces; ++i) {
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_TESSELATION,
|
||||||
|
TESSELATION_COLOR,
|
||||||
|
TESSELATION_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_RIGHT_EYE,
|
||||||
|
RIGHT_EYE_COLOR,
|
||||||
|
RIGHT_EYE_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
|
||||||
|
RIGHT_EYEBROW_COLOR,
|
||||||
|
RIGHT_EYEBROW_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LEFT_EYE,
|
||||||
|
LEFT_EYE_COLOR,
|
||||||
|
LEFT_EYE_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
|
||||||
|
LEFT_EYEBROW_COLOR,
|
||||||
|
LEFT_EYEBROW_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_FACE_OVAL,
|
||||||
|
FACE_OVAL_COLOR,
|
||||||
|
FACE_OVAL_THICKNESS);
|
||||||
|
drawLandmarks(
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LIPS,
|
||||||
|
LIPS_COLOR,
|
||||||
|
LIPS_THICKNESS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls this to delete the shader program.
|
||||||
|
*
|
||||||
|
* <p>This is only necessary if one wants to release the program while keeping the context around.
|
||||||
|
*/
|
||||||
|
public void release() {
|
||||||
|
GLES20.glDeleteProgram(program);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void drawLandmarks(
|
||||||
|
List<NormalizedLandmark> faceLandmarkList,
|
||||||
|
ImmutableSet<FaceMeshConnections.Connection> connections,
|
||||||
|
float[] colorArray,
|
||||||
|
int thickness) {
|
||||||
|
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
|
||||||
|
GLES20.glLineWidth(thickness);
|
||||||
|
for (FaceMeshConnections.Connection c : connections) {
|
||||||
|
float[] vertex = new float[4];
|
||||||
|
NormalizedLandmark start = faceLandmarkList.get(c.start());
|
||||||
|
vertex[0] = normalizedLandmarkValue(start.getX());
|
||||||
|
vertex[1] = normalizedLandmarkValue(start.getY());
|
||||||
|
NormalizedLandmark end = faceLandmarkList.get(c.end());
|
||||||
|
vertex[2] = normalizedLandmarkValue(end.getX());
|
||||||
|
vertex[3] = normalizedLandmarkValue(end.getY());
|
||||||
|
FloatBuffer vertexBuffer =
|
||||||
|
ByteBuffer.allocateDirect(vertex.length * 4)
|
||||||
|
.order(ByteOrder.nativeOrder())
|
||||||
|
.asFloatBuffer()
|
||||||
|
.put(vertex);
|
||||||
|
vertexBuffer.position(0);
|
||||||
|
GLES20.glEnableVertexAttribArray(positionHandle);
|
||||||
|
GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer);
|
||||||
|
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate
|
||||||
|
// value range: [-1, 1].
|
||||||
|
private float normalizedLandmarkValue(float value) {
|
||||||
|
return value * 2 - 1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,158 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.examples.facemesh;
|
||||||
|
|
||||||
|
import android.content.Context;
|
||||||
|
import android.graphics.Bitmap;
|
||||||
|
import android.graphics.Canvas;
|
||||||
|
import android.graphics.Color;
|
||||||
|
import android.graphics.Matrix;
|
||||||
|
import android.graphics.Paint;
|
||||||
|
import androidx.appcompat.widget.AppCompatImageView;
|
||||||
|
import android.util.Size;
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** An ImageView implementation for displaying MediaPipe FaceMesh results. */
|
||||||
|
public class FaceMeshResultImageView extends AppCompatImageView {
|
||||||
|
private static final String TAG = "FaceMeshResultImageView";
|
||||||
|
|
||||||
|
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
|
||||||
|
private static final int TESSELATION_THICKNESS = 5;
|
||||||
|
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
|
||||||
|
private static final int RIGHT_EYE_THICKNESS = 8;
|
||||||
|
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
|
||||||
|
private static final int RIGHT_EYEBROW_THICKNESS = 8;
|
||||||
|
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
|
||||||
|
private static final int LEFT_EYE_THICKNESS = 8;
|
||||||
|
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
|
||||||
|
private static final int LEFT_EYEBROW_THICKNESS = 8;
|
||||||
|
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
|
||||||
|
private static final int FACE_OVAL_THICKNESS = 8;
|
||||||
|
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
|
||||||
|
private static final int LIPS_THICKNESS = 8;
|
||||||
|
private Bitmap latest;
|
||||||
|
|
||||||
|
public FaceMeshResultImageView(Context context) {
|
||||||
|
super(context);
|
||||||
|
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a {@link FaceMeshResult} to render.
|
||||||
|
*
|
||||||
|
* @param result a {@link FaceMeshResult} object that contains the solution outputs and the input
|
||||||
|
* {@link Bitmap}.
|
||||||
|
*/
|
||||||
|
public void setFaceMeshResult(FaceMeshResult result) {
|
||||||
|
if (result == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Bitmap bmInput = result.inputBitmap();
|
||||||
|
int width = bmInput.getWidth();
|
||||||
|
int height = bmInput.getHeight();
|
||||||
|
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
||||||
|
Canvas canvas = new Canvas(latest);
|
||||||
|
Size imageSize = new Size(width, height);
|
||||||
|
canvas.drawBitmap(bmInput, new Matrix(), null);
|
||||||
|
int numFaces = result.multiFaceLandmarks().size();
|
||||||
|
for (int i = 0; i < numFaces; ++i) {
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_TESSELATION,
|
||||||
|
imageSize,
|
||||||
|
TESSELATION_COLOR,
|
||||||
|
TESSELATION_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_RIGHT_EYE,
|
||||||
|
imageSize,
|
||||||
|
RIGHT_EYE_COLOR,
|
||||||
|
RIGHT_EYE_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
|
||||||
|
imageSize,
|
||||||
|
RIGHT_EYEBROW_COLOR,
|
||||||
|
RIGHT_EYEBROW_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LEFT_EYE,
|
||||||
|
imageSize,
|
||||||
|
LEFT_EYE_COLOR,
|
||||||
|
LEFT_EYE_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
|
||||||
|
imageSize,
|
||||||
|
LEFT_EYEBROW_COLOR,
|
||||||
|
LEFT_EYEBROW_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_FACE_OVAL,
|
||||||
|
imageSize,
|
||||||
|
FACE_OVAL_COLOR,
|
||||||
|
FACE_OVAL_THICKNESS);
|
||||||
|
drawLandmarksOnCanvas(
|
||||||
|
canvas,
|
||||||
|
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||||
|
FaceMeshConnections.FACEMESH_LIPS,
|
||||||
|
imageSize,
|
||||||
|
LIPS_COLOR,
|
||||||
|
LIPS_THICKNESS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Updates the image view with the latest facemesh result. */
|
||||||
|
public void update() {
|
||||||
|
postInvalidate();
|
||||||
|
if (latest != null) {
|
||||||
|
setImageBitmap(latest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Better hand landmark and hand connection drawing.
|
||||||
|
private void drawLandmarksOnCanvas(
|
||||||
|
Canvas canvas,
|
||||||
|
List<NormalizedLandmark> faceLandmarkList,
|
||||||
|
ImmutableSet<FaceMeshConnections.Connection> connections,
|
||||||
|
Size imageSize,
|
||||||
|
int color,
|
||||||
|
int thickness) {
|
||||||
|
// Draw connections.
|
||||||
|
for (FaceMeshConnections.Connection c : connections) {
|
||||||
|
Paint connectionPaint = new Paint();
|
||||||
|
connectionPaint.setColor(color);
|
||||||
|
connectionPaint.setStrokeWidth(thickness);
|
||||||
|
NormalizedLandmark start = faceLandmarkList.get(c.start());
|
||||||
|
NormalizedLandmark end = faceLandmarkList.get(c.end());
|
||||||
|
canvas.drawLine(
|
||||||
|
start.getX() * imageSize.getWidth(),
|
||||||
|
start.getY() * imageSize.getHeight(),
|
||||||
|
end.getX() * imageSize.getWidth(),
|
||||||
|
end.getY() * imageSize.getHeight(),
|
||||||
|
connectionPaint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,308 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.examples.facemesh;
|
||||||
|
|
||||||
|
import android.content.Intent;
|
||||||
|
import android.graphics.Bitmap;
|
||||||
|
import android.os.Bundle;
|
||||||
|
import android.provider.MediaStore;
|
||||||
|
import androidx.appcompat.app.AppCompatActivity;
|
||||||
|
import android.util.Log;
|
||||||
|
import android.view.View;
|
||||||
|
import android.widget.Button;
|
||||||
|
import android.widget.FrameLayout;
|
||||||
|
import androidx.activity.result.ActivityResultLauncher;
|
||||||
|
import androidx.activity.result.contract.ActivityResultContracts;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
|
import com.google.mediapipe.solutioncore.CameraInput;
|
||||||
|
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||||
|
import com.google.mediapipe.solutioncore.VideoInput;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMesh;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshOptions;
|
||||||
|
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/** Main activity of MediaPipe FaceMesh app. */
|
||||||
|
public class MainActivity extends AppCompatActivity {
|
||||||
|
private static final String TAG = "MainActivity";
|
||||||
|
|
||||||
|
private FaceMesh facemesh;
|
||||||
|
// Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
private static final boolean RUN_ON_GPU = true;
|
||||||
|
|
||||||
|
private enum InputSource {
|
||||||
|
UNKNOWN,
|
||||||
|
IMAGE,
|
||||||
|
VIDEO,
|
||||||
|
CAMERA,
|
||||||
|
}
|
||||||
|
private InputSource inputSource = InputSource.UNKNOWN;
|
||||||
|
// Image demo UI and image loader components.
|
||||||
|
private ActivityResultLauncher<Intent> imageGetter;
|
||||||
|
private FaceMeshResultImageView imageView;
|
||||||
|
// Video demo UI and video loader components.
|
||||||
|
private VideoInput videoInput;
|
||||||
|
private ActivityResultLauncher<Intent> videoGetter;
|
||||||
|
// Live camera demo UI and camera components.
|
||||||
|
private CameraInput cameraInput;
|
||||||
|
private SolutionGlSurfaceView<FaceMeshResult> glSurfaceView;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onCreate(Bundle savedInstanceState) {
|
||||||
|
super.onCreate(savedInstanceState);
|
||||||
|
setContentView(R.layout.activity_main);
|
||||||
|
setupStaticImageDemoUiComponents();
|
||||||
|
setupVideoDemoUiComponents();
|
||||||
|
setupLiveDemoUiComponents();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onResume() {
|
||||||
|
super.onResume();
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
// Restarts the camera and the opengl surface rendering.
|
||||||
|
cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||||
|
glSurfaceView.post(this::startCamera);
|
||||||
|
glSurfaceView.setVisibility(View.VISIBLE);
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
videoInput.resume();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onPause() {
|
||||||
|
super.onPause();
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
glSurfaceView.setVisibility(View.GONE);
|
||||||
|
cameraInput.close();
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
videoInput.pause();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets up the UI components for the static image demo. */
|
||||||
|
private void setupStaticImageDemoUiComponents() {
|
||||||
|
// The Intent to access gallery and read images as bitmap.
|
||||||
|
imageGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
Bitmap bitmap = null;
|
||||||
|
try {
|
||||||
|
bitmap =
|
||||||
|
MediaStore.Images.Media.getBitmap(
|
||||||
|
this.getContentResolver(), resultIntent.getData());
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "Bitmap reading error:" + e);
|
||||||
|
}
|
||||||
|
if (bitmap != null) {
|
||||||
|
facemesh.send(bitmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Button loadImageButton = findViewById(R.id.button_load_picture);
|
||||||
|
loadImageButton.setOnClickListener(
|
||||||
|
v -> {
|
||||||
|
if (inputSource != InputSource.IMAGE) {
|
||||||
|
stopCurrentPipeline();
|
||||||
|
setupStaticImageModePipeline();
|
||||||
|
}
|
||||||
|
// Reads images from gallery.
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||||
|
imageGetter.launch(gallery);
|
||||||
|
});
|
||||||
|
imageView = new FaceMeshResultImageView(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The core MediaPipe FaceMesh setup workflow for its static image mode. */
|
||||||
|
private void setupStaticImageModePipeline() {
|
||||||
|
this.inputSource = InputSource.IMAGE;
|
||||||
|
// Initializes a new MediaPipe FaceMesh instance in the static image mode.
|
||||||
|
facemesh =
|
||||||
|
new FaceMesh(
|
||||||
|
this,
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE)
|
||||||
|
.setRunOnGpu(RUN_ON_GPU)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
// Connects MediaPipe FaceMesh to the user-defined FaceMeshResultImageView.
|
||||||
|
facemesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true);
|
||||||
|
imageView.setFaceMeshResult(faceMeshResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
|
});
|
||||||
|
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||||
|
|
||||||
|
// Updates the preview layout.
|
||||||
|
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||||
|
frameLayout.removeAllViewsInLayout();
|
||||||
|
imageView.setImageDrawable(null);
|
||||||
|
frameLayout.addView(imageView);
|
||||||
|
imageView.setVisibility(View.VISIBLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets up the UI components for the video demo. */
|
||||||
|
private void setupVideoDemoUiComponents() {
|
||||||
|
// The Intent to access gallery and read a video file.
|
||||||
|
videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
facemesh.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Button loadVideoButton = findViewById(R.id.button_load_video);
|
||||||
|
loadVideoButton.setOnClickListener(
|
||||||
|
v -> {
|
||||||
|
stopCurrentPipeline();
|
||||||
|
setupStreamingModePipeline(InputSource.VIDEO);
|
||||||
|
// Reads video from gallery.
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||||
|
videoGetter.launch(gallery);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets up the UI components for the live demo with camera input. */
|
||||||
|
private void setupLiveDemoUiComponents() {
|
||||||
|
Button startCameraButton = findViewById(R.id.button_start_camera);
|
||||||
|
startCameraButton.setOnClickListener(
|
||||||
|
v -> {
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
stopCurrentPipeline();
|
||||||
|
setupStreamingModePipeline(InputSource.CAMERA);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The core MediaPipe FaceMesh setup workflow for its streaming mode. */
|
||||||
|
private void setupStreamingModePipeline(InputSource inputSource) {
|
||||||
|
this.inputSource = inputSource;
|
||||||
|
// Initializes a new MediaPipe FaceMesh instance in the streaming mode.
|
||||||
|
facemesh =
|
||||||
|
new FaceMesh(
|
||||||
|
this,
|
||||||
|
FaceMeshOptions.builder()
|
||||||
|
.setMode(FaceMeshOptions.STREAMING_MODE)
|
||||||
|
.setRunOnGpu(RUN_ON_GPU)
|
||||||
|
.build());
|
||||||
|
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||||
|
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
|
||||||
|
cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
|
||||||
|
videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initializes a new Gl surface view with a user-defined FaceMeshResultGlRenderer.
|
||||||
|
glSurfaceView =
|
||||||
|
new SolutionGlSurfaceView<>(this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||||
|
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||||
|
glSurfaceView.setRenderInputImage(true);
|
||||||
|
facemesh.setResultListener(
|
||||||
|
faceMeshResult -> {
|
||||||
|
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ false);
|
||||||
|
glSurfaceView.setRenderData(faceMeshResult);
|
||||||
|
glSurfaceView.requestRender();
|
||||||
|
});
|
||||||
|
|
||||||
|
// The runnable to start camera after the gl surface view is attached.
|
||||||
|
// For video input source, videoInput.start() will be called when the video uri is available.
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
glSurfaceView.post(this::startCamera);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Updates the preview layout.
|
||||||
|
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||||
|
imageView.setVisibility(View.GONE);
|
||||||
|
frameLayout.removeAllViewsInLayout();
|
||||||
|
frameLayout.addView(glSurfaceView);
|
||||||
|
glSurfaceView.setVisibility(View.VISIBLE);
|
||||||
|
frameLayout.requestLayout();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startCamera() {
|
||||||
|
cameraInput.start(
|
||||||
|
this,
|
||||||
|
facemesh.getGlContext(),
|
||||||
|
CameraInput.CameraFacing.FRONT,
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void stopCurrentPipeline() {
|
||||||
|
if (cameraInput != null) {
|
||||||
|
cameraInput.setNewFrameListener(null);
|
||||||
|
cameraInput.close();
|
||||||
|
}
|
||||||
|
if (videoInput != null) {
|
||||||
|
videoInput.setNewFrameListener(null);
|
||||||
|
videoInput.close();
|
||||||
|
}
|
||||||
|
if (glSurfaceView != null) {
|
||||||
|
glSurfaceView.setVisibility(View.GONE);
|
||||||
|
}
|
||||||
|
if (facemesh != null) {
|
||||||
|
facemesh.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logNoseLandmark(FaceMeshResult result, boolean showPixelValues) {
|
||||||
|
if (result == null || result.multiFaceLandmarks().isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
NormalizedLandmark noseLandmark = result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||||
|
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||||
|
if (showPixelValues) {
|
||||||
|
int width = result.inputBitmap().getWidth();
|
||||||
|
int height = result.inputBitmap().getHeight();
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
|
||||||
|
noseLandmark.getX() * width, noseLandmark.getY() * height));
|
||||||
|
} else {
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
|
noseLandmark.getX(), noseLandmark.getY()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
1
mediapipe/examples/android/solutions/facemesh/src/main/res
Symbolic link
1
mediapipe/examples/android/solutions/facemesh/src/main/res
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../res
|
|
@ -19,7 +19,8 @@
|
||||||
android:roundIcon="@mipmap/ic_launcher_round"
|
android:roundIcon="@mipmap/ic_launcher_round"
|
||||||
android:supportsRtl="true"
|
android:supportsRtl="true"
|
||||||
android:theme="@style/AppTheme">
|
android:theme="@style/AppTheme">
|
||||||
<activity android:name=".MainActivity">
|
<activity android:name=".MainActivity"
|
||||||
|
android:screenOrientation="portrait">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|
||||||
|
|
|
@ -31,10 +31,14 @@ android_binary(
|
||||||
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
|
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
|
||||||
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
|
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
|
||||||
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
|
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
|
||||||
"//mediapipe/java/com/google/mediapipe/solutions/hands",
|
"//mediapipe/java/com/google/mediapipe/solutions/hands",
|
||||||
"//third_party:androidx_appcompat",
|
"//third_party:androidx_appcompat",
|
||||||
"//third_party:androidx_constraint_layout",
|
"//third_party:androidx_constraint_layout",
|
||||||
|
"//third_party:opencv",
|
||||||
|
"@maven//:androidx_activity_activity",
|
||||||
"@maven//:androidx_concurrent_concurrent_futures",
|
"@maven//:androidx_concurrent_concurrent_futures",
|
||||||
|
"@maven//:androidx_fragment_fragment",
|
||||||
"@maven//:com_google_guava_guava",
|
"@maven//:com_google_guava_guava",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -46,7 +46,6 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
||||||
private int positionHandle;
|
private int positionHandle;
|
||||||
private int transformMatrixHandle;
|
private int transformMatrixHandle;
|
||||||
private final float[] transformMatrix = new float[16];
|
private final float[] transformMatrix = new float[16];
|
||||||
private FloatBuffer vertexBuffer;
|
|
||||||
|
|
||||||
private int loadShader(int type, String shaderCode) {
|
private int loadShader(int type, String shaderCode) {
|
||||||
int shader = GLES20.glCreateShader(type);
|
int shader = GLES20.glCreateShader(type);
|
||||||
|
@ -74,12 +73,15 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
||||||
}
|
}
|
||||||
GLES20.glUseProgram(program);
|
GLES20.glUseProgram(program);
|
||||||
// Sets the transform matrix to align the result rendering with the scaled output texture.
|
// Sets the transform matrix to align the result rendering with the scaled output texture.
|
||||||
|
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
|
||||||
|
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
|
||||||
|
// top-left corner.
|
||||||
Matrix.setIdentityM(transformMatrix, 0);
|
Matrix.setIdentityM(transformMatrix, 0);
|
||||||
Matrix.scaleM(
|
Matrix.scaleM(
|
||||||
transformMatrix,
|
transformMatrix,
|
||||||
0,
|
0,
|
||||||
2 / (boundary.right() - boundary.left()),
|
2 / (boundary.right() - boundary.left()),
|
||||||
2 / (boundary.top() - boundary.bottom()),
|
-2 / (boundary.top() - boundary.bottom()),
|
||||||
1.0f);
|
1.0f);
|
||||||
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
|
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
|
||||||
GLES20.glLineWidth(CONNECTION_THICKNESS);
|
GLES20.glLineWidth(CONNECTION_THICKNESS);
|
||||||
|
@ -109,7 +111,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
||||||
NormalizedLandmark end = handLandmarkList.get(c.end());
|
NormalizedLandmark end = handLandmarkList.get(c.end());
|
||||||
vertex[2] = normalizedLandmarkValue(end.getX());
|
vertex[2] = normalizedLandmarkValue(end.getX());
|
||||||
vertex[3] = normalizedLandmarkValue(end.getY());
|
vertex[3] = normalizedLandmarkValue(end.getY());
|
||||||
vertexBuffer =
|
FloatBuffer vertexBuffer =
|
||||||
ByteBuffer.allocateDirect(vertex.length * 4)
|
ByteBuffer.allocateDirect(vertex.length * 4)
|
||||||
.order(ByteOrder.nativeOrder())
|
.order(ByteOrder.nativeOrder())
|
||||||
.asFloatBuffer()
|
.asFloatBuffer()
|
||||||
|
|
|
@ -20,7 +20,7 @@ import android.graphics.Canvas;
|
||||||
import android.graphics.Color;
|
import android.graphics.Color;
|
||||||
import android.graphics.Matrix;
|
import android.graphics.Matrix;
|
||||||
import android.graphics.Paint;
|
import android.graphics.Paint;
|
||||||
import android.widget.ImageView;
|
import androidx.appcompat.widget.AppCompatImageView;
|
||||||
import com.google.mediapipe.formats.proto.LandmarkProto;
|
import com.google.mediapipe.formats.proto.LandmarkProto;
|
||||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
import com.google.mediapipe.solutions.hands.Hands;
|
import com.google.mediapipe.solutions.hands.Hands;
|
||||||
|
@ -28,17 +28,18 @@ import com.google.mediapipe.solutions.hands.HandsResult;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/** An ImageView implementation for displaying MediaPipe Hands results. */
|
/** An ImageView implementation for displaying MediaPipe Hands results. */
|
||||||
public class HandsResultImageView extends ImageView {
|
public class HandsResultImageView extends AppCompatImageView {
|
||||||
private static final String TAG = "HandsResultImageView";
|
private static final String TAG = "HandsResultImageView";
|
||||||
|
|
||||||
private static final int LANDMARK_COLOR = Color.RED;
|
private static final int LANDMARK_COLOR = Color.RED;
|
||||||
private static final int LANDMARK_RADIUS = 15;
|
private static final int LANDMARK_RADIUS = 15;
|
||||||
private static final int CONNECTION_COLOR = Color.GREEN;
|
private static final int CONNECTION_COLOR = Color.GREEN;
|
||||||
private static final int CONNECTION_THICKNESS = 10;
|
private static final int CONNECTION_THICKNESS = 10;
|
||||||
|
private Bitmap latest;
|
||||||
|
|
||||||
public HandsResultImageView(Context context) {
|
public HandsResultImageView(Context context) {
|
||||||
super(context);
|
super(context);
|
||||||
setScaleType(ImageView.ScaleType.FIT_CENTER);
|
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,8 +55,8 @@ public class HandsResultImageView extends ImageView {
|
||||||
Bitmap bmInput = result.inputBitmap();
|
Bitmap bmInput = result.inputBitmap();
|
||||||
int width = bmInput.getWidth();
|
int width = bmInput.getWidth();
|
||||||
int height = bmInput.getHeight();
|
int height = bmInput.getHeight();
|
||||||
Bitmap bmOutput = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
||||||
Canvas canvas = new Canvas(bmOutput);
|
Canvas canvas = new Canvas(latest);
|
||||||
|
|
||||||
canvas.drawBitmap(bmInput, new Matrix(), null);
|
canvas.drawBitmap(bmInput, new Matrix(), null);
|
||||||
int numHands = result.multiHandLandmarks().size();
|
int numHands = result.multiHandLandmarks().size();
|
||||||
|
@ -63,8 +64,14 @@ public class HandsResultImageView extends ImageView {
|
||||||
drawLandmarksOnCanvas(
|
drawLandmarksOnCanvas(
|
||||||
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
|
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Updates the image view with the latest hands result. */
|
||||||
|
public void update() {
|
||||||
postInvalidate();
|
postInvalidate();
|
||||||
setImageBitmap(bmOutput);
|
if (latest != null) {
|
||||||
|
setImageBitmap(latest);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Better hand landmark and hand connection drawing.
|
// TODO: Better hand landmark and hand connection drawing.
|
||||||
|
|
|
@ -28,6 +28,7 @@ import androidx.activity.result.contract.ActivityResultContracts;
|
||||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
import com.google.mediapipe.solutioncore.CameraInput;
|
import com.google.mediapipe.solutioncore.CameraInput;
|
||||||
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||||
|
import com.google.mediapipe.solutioncore.VideoInput;
|
||||||
import com.google.mediapipe.solutions.hands.HandLandmark;
|
import com.google.mediapipe.solutions.hands.HandLandmark;
|
||||||
import com.google.mediapipe.solutions.hands.Hands;
|
import com.google.mediapipe.solutions.hands.Hands;
|
||||||
import com.google.mediapipe.solutions.hands.HandsOptions;
|
import com.google.mediapipe.solutions.hands.HandsOptions;
|
||||||
|
@ -39,14 +40,24 @@ public class MainActivity extends AppCompatActivity {
|
||||||
private static final String TAG = "MainActivity";
|
private static final String TAG = "MainActivity";
|
||||||
|
|
||||||
private Hands hands;
|
private Hands hands;
|
||||||
private int mode = HandsOptions.STATIC_IMAGE_MODE;
|
// Run the pipeline and the model inference on GPU or CPU.
|
||||||
|
private static final boolean RUN_ON_GPU = true;
|
||||||
|
|
||||||
|
private enum InputSource {
|
||||||
|
UNKNOWN,
|
||||||
|
IMAGE,
|
||||||
|
VIDEO,
|
||||||
|
CAMERA,
|
||||||
|
}
|
||||||
|
private InputSource inputSource = InputSource.UNKNOWN;
|
||||||
|
|
||||||
// Image demo UI and image loader components.
|
// Image demo UI and image loader components.
|
||||||
private Button loadImageButton;
|
|
||||||
private ActivityResultLauncher<Intent> imageGetter;
|
private ActivityResultLauncher<Intent> imageGetter;
|
||||||
private HandsResultImageView imageView;
|
private HandsResultImageView imageView;
|
||||||
|
// Video demo UI and video loader components.
|
||||||
|
private VideoInput videoInput;
|
||||||
|
private ActivityResultLauncher<Intent> videoGetter;
|
||||||
// Live camera demo UI and camera components.
|
// Live camera demo UI and camera components.
|
||||||
private Button startCameraButton;
|
|
||||||
private CameraInput cameraInput;
|
private CameraInput cameraInput;
|
||||||
private SolutionGlSurfaceView<HandsResult> glSurfaceView;
|
private SolutionGlSurfaceView<HandsResult> glSurfaceView;
|
||||||
|
|
||||||
|
@ -55,26 +66,32 @@ public class MainActivity extends AppCompatActivity {
|
||||||
super.onCreate(savedInstanceState);
|
super.onCreate(savedInstanceState);
|
||||||
setContentView(R.layout.activity_main);
|
setContentView(R.layout.activity_main);
|
||||||
setupStaticImageDemoUiComponents();
|
setupStaticImageDemoUiComponents();
|
||||||
|
setupVideoDemoUiComponents();
|
||||||
setupLiveDemoUiComponents();
|
setupLiveDemoUiComponents();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onResume() {
|
protected void onResume() {
|
||||||
super.onResume();
|
super.onResume();
|
||||||
if (mode == HandsOptions.STREAMING_MODE) {
|
if (inputSource == InputSource.CAMERA) {
|
||||||
// Restarts the camera and the opengl surface rendering.
|
// Restarts the camera and the opengl surface rendering.
|
||||||
cameraInput = new CameraInput(this);
|
cameraInput = new CameraInput(this);
|
||||||
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame));
|
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||||
glSurfaceView.post(this::startCamera);
|
glSurfaceView.post(this::startCamera);
|
||||||
glSurfaceView.setVisibility(View.VISIBLE);
|
glSurfaceView.setVisibility(View.VISIBLE);
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
videoInput.resume();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onPause() {
|
protected void onPause() {
|
||||||
super.onPause();
|
super.onPause();
|
||||||
if (mode == HandsOptions.STREAMING_MODE) {
|
if (inputSource == InputSource.CAMERA) {
|
||||||
stopLiveDemo();
|
glSurfaceView.setVisibility(View.GONE);
|
||||||
|
cameraInput.close();
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
videoInput.pause();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,80 +119,122 @@ public class MainActivity extends AppCompatActivity {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
loadImageButton = (Button) findViewById(R.id.button_load_picture);
|
Button loadImageButton = findViewById(R.id.button_load_picture);
|
||||||
loadImageButton.setOnClickListener(
|
loadImageButton.setOnClickListener(
|
||||||
new View.OnClickListener() {
|
v -> {
|
||||||
@Override
|
if (inputSource != InputSource.IMAGE) {
|
||||||
public void onClick(View v) {
|
stopCurrentPipeline();
|
||||||
if (mode == HandsOptions.STREAMING_MODE) {
|
setupStaticImageModePipeline();
|
||||||
stopLiveDemo();
|
|
||||||
}
|
|
||||||
if (hands == null || mode != HandsOptions.STATIC_IMAGE_MODE) {
|
|
||||||
setupStaticImageModePipeline();
|
|
||||||
}
|
|
||||||
// Reads images from gallery.
|
|
||||||
Intent gallery =
|
|
||||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
|
||||||
imageGetter.launch(gallery);
|
|
||||||
}
|
}
|
||||||
|
// Reads images from gallery.
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||||
|
imageGetter.launch(gallery);
|
||||||
});
|
});
|
||||||
imageView = new HandsResultImageView(this);
|
imageView = new HandsResultImageView(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The core MediaPipe Hands setup workflow for its static image mode. */
|
/** The core MediaPipe Hands setup workflow for its static image mode. */
|
||||||
private void setupStaticImageModePipeline() {
|
private void setupStaticImageModePipeline() {
|
||||||
|
this.inputSource = InputSource.IMAGE;
|
||||||
// Initializes a new MediaPipe Hands instance in the static image mode.
|
// Initializes a new MediaPipe Hands instance in the static image mode.
|
||||||
mode = HandsOptions.STATIC_IMAGE_MODE;
|
hands =
|
||||||
if (hands != null) {
|
new Hands(
|
||||||
hands.close();
|
this,
|
||||||
}
|
HandsOptions.builder()
|
||||||
hands = new Hands(this, HandsOptions.builder().setMode(mode).build());
|
.setMode(HandsOptions.STATIC_IMAGE_MODE)
|
||||||
|
.setMaxNumHands(1)
|
||||||
|
.setRunOnGpu(RUN_ON_GPU)
|
||||||
|
.build());
|
||||||
|
|
||||||
// Connects MediaPipe Hands to the user-defined HandsResultImageView.
|
// Connects MediaPipe Hands to the user-defined HandsResultImageView.
|
||||||
hands.setResultListener(
|
hands.setResultListener(
|
||||||
handsResult -> {
|
handsResult -> {
|
||||||
logWristLandmark(handsResult, /*showPixelValues=*/ true);
|
logWristLandmark(handsResult, /*showPixelValues=*/ true);
|
||||||
runOnUiThread(() -> imageView.setHandsResult(handsResult));
|
imageView.setHandsResult(handsResult);
|
||||||
|
runOnUiThread(() -> imageView.update());
|
||||||
});
|
});
|
||||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message));
|
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
// Updates the preview layout.
|
// Updates the preview layout.
|
||||||
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout);
|
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||||
frameLayout.removeAllViewsInLayout();
|
frameLayout.removeAllViewsInLayout();
|
||||||
imageView.setImageDrawable(null);
|
imageView.setImageDrawable(null);
|
||||||
frameLayout.addView(imageView);
|
frameLayout.addView(imageView);
|
||||||
imageView.setVisibility(View.VISIBLE);
|
imageView.setVisibility(View.VISIBLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sets up the UI components for the video demo. */
|
||||||
|
private void setupVideoDemoUiComponents() {
|
||||||
|
// The Intent to access gallery and read a video file.
|
||||||
|
videoGetter =
|
||||||
|
registerForActivityResult(
|
||||||
|
new ActivityResultContracts.StartActivityForResult(),
|
||||||
|
result -> {
|
||||||
|
Intent resultIntent = result.getData();
|
||||||
|
if (resultIntent != null) {
|
||||||
|
if (result.getResultCode() == RESULT_OK) {
|
||||||
|
glSurfaceView.post(
|
||||||
|
() ->
|
||||||
|
videoInput.start(
|
||||||
|
this,
|
||||||
|
resultIntent.getData(),
|
||||||
|
hands.getGlContext(),
|
||||||
|
glSurfaceView.getWidth(),
|
||||||
|
glSurfaceView.getHeight()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Button loadVideoButton = findViewById(R.id.button_load_video);
|
||||||
|
loadVideoButton.setOnClickListener(
|
||||||
|
v -> {
|
||||||
|
stopCurrentPipeline();
|
||||||
|
setupStreamingModePipeline(InputSource.VIDEO);
|
||||||
|
// Reads video from gallery.
|
||||||
|
Intent gallery =
|
||||||
|
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||||
|
videoGetter.launch(gallery);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/** Sets up the UI components for the live demo with camera input. */
|
/** Sets up the UI components for the live demo with camera input. */
|
||||||
private void setupLiveDemoUiComponents() {
|
private void setupLiveDemoUiComponents() {
|
||||||
startCameraButton = (Button) findViewById(R.id.button_start_camera);
|
Button startCameraButton = findViewById(R.id.button_start_camera);
|
||||||
startCameraButton.setOnClickListener(
|
startCameraButton.setOnClickListener(
|
||||||
new View.OnClickListener() {
|
v -> {
|
||||||
@Override
|
if (inputSource == InputSource.CAMERA) {
|
||||||
public void onClick(View v) {
|
return;
|
||||||
if (hands == null || mode != HandsOptions.STREAMING_MODE) {
|
|
||||||
setupStreamingModePipeline();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
stopCurrentPipeline();
|
||||||
|
setupStreamingModePipeline(InputSource.CAMERA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The core MediaPipe Hands setup workflow for its streaming mode. */
|
/** The core MediaPipe Hands setup workflow for its streaming mode. */
|
||||||
private void setupStreamingModePipeline() {
|
private void setupStreamingModePipeline(InputSource inputSource) {
|
||||||
|
this.inputSource = inputSource;
|
||||||
// Initializes a new MediaPipe Hands instance in the streaming mode.
|
// Initializes a new MediaPipe Hands instance in the streaming mode.
|
||||||
mode = HandsOptions.STREAMING_MODE;
|
hands =
|
||||||
if (hands != null) {
|
new Hands(
|
||||||
hands.close();
|
this,
|
||||||
|
HandsOptions.builder()
|
||||||
|
.setMode(HandsOptions.STREAMING_MODE)
|
||||||
|
.setMaxNumHands(1)
|
||||||
|
.setRunOnGpu(RUN_ON_GPU)
|
||||||
|
.build());
|
||||||
|
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||||
|
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
||||||
|
cameraInput = new CameraInput(this);
|
||||||
|
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||||
|
} else if (inputSource == InputSource.VIDEO) {
|
||||||
|
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
|
||||||
|
videoInput = new VideoInput(this);
|
||||||
|
videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||||
}
|
}
|
||||||
hands = new Hands(this, HandsOptions.builder().setMode(mode).build());
|
|
||||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message));
|
|
||||||
|
|
||||||
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
// Initializes a new Gl surface view with a user-defined HandsResultGlRenderer.
|
||||||
cameraInput = new CameraInput(this);
|
|
||||||
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame));
|
|
||||||
|
|
||||||
// Initalizes a new Gl surface view with a user-defined HandsResultGlRenderer.
|
|
||||||
glSurfaceView =
|
glSurfaceView =
|
||||||
new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion());
|
new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||||
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||||
|
@ -188,10 +247,13 @@ public class MainActivity extends AppCompatActivity {
|
||||||
});
|
});
|
||||||
|
|
||||||
// The runnable to start camera after the gl surface view is attached.
|
// The runnable to start camera after the gl surface view is attached.
|
||||||
glSurfaceView.post(this::startCamera);
|
// For video input source, videoInput.start() will be called when the video uri is available.
|
||||||
|
if (inputSource == InputSource.CAMERA) {
|
||||||
|
glSurfaceView.post(this::startCamera);
|
||||||
|
}
|
||||||
|
|
||||||
// Updates the preview layout.
|
// Updates the preview layout.
|
||||||
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout);
|
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||||
imageView.setVisibility(View.GONE);
|
imageView.setVisibility(View.GONE);
|
||||||
frameLayout.removeAllViewsInLayout();
|
frameLayout.removeAllViewsInLayout();
|
||||||
frameLayout.addView(glSurfaceView);
|
frameLayout.addView(glSurfaceView);
|
||||||
|
@ -208,34 +270,40 @@ public class MainActivity extends AppCompatActivity {
|
||||||
glSurfaceView.getHeight());
|
glSurfaceView.getHeight());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void stopLiveDemo() {
|
private void stopCurrentPipeline() {
|
||||||
if (cameraInput != null) {
|
if (cameraInput != null) {
|
||||||
cameraInput.stop();
|
cameraInput.setNewFrameListener(null);
|
||||||
|
cameraInput.close();
|
||||||
|
}
|
||||||
|
if (videoInput != null) {
|
||||||
|
videoInput.setNewFrameListener(null);
|
||||||
|
videoInput.close();
|
||||||
}
|
}
|
||||||
if (glSurfaceView != null) {
|
if (glSurfaceView != null) {
|
||||||
glSurfaceView.setVisibility(View.GONE);
|
glSurfaceView.setVisibility(View.GONE);
|
||||||
}
|
}
|
||||||
|
if (hands != null) {
|
||||||
|
hands.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void logWristLandmark(HandsResult result, boolean showPixelValues) {
|
private void logWristLandmark(HandsResult result, boolean showPixelValues) {
|
||||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
|
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
|
||||||
// For Bitmaps, show the pixel values. For texture inputs, show the normoralized cooridanates.
|
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||||
if (showPixelValues) {
|
if (showPixelValues) {
|
||||||
int width = result.inputBitmap().getWidth();
|
int width = result.inputBitmap().getWidth();
|
||||||
int height = result.inputBitmap().getHeight();
|
int height = result.inputBitmap().getHeight();
|
||||||
Log.i(
|
Log.i(
|
||||||
TAG,
|
TAG,
|
||||||
"MediaPipe Hand wrist coordinates (pixel values): x= "
|
String.format(
|
||||||
+ wristLandmark.getX() * width
|
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
|
||||||
+ " y="
|
wristLandmark.getX() * width, wristLandmark.getY() * height));
|
||||||
+ wristLandmark.getY() * height);
|
|
||||||
} else {
|
} else {
|
||||||
Log.i(
|
Log.i(
|
||||||
TAG,
|
TAG,
|
||||||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x= "
|
String.format(
|
||||||
+ wristLandmark.getX()
|
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||||
+ " y="
|
wristLandmark.getX(), wristLandmark.getY()));
|
||||||
+ wristLandmark.getY());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,18 +8,23 @@
|
||||||
android:id="@+id/buttons"
|
android:id="@+id/buttons"
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="wrap_content"
|
android:layout_height="wrap_content"
|
||||||
android:gravity="center"
|
style="?android:attr/buttonBarStyle" android:gravity="center"
|
||||||
android:orientation="horizontal">
|
android:orientation="horizontal">
|
||||||
<Button
|
<Button
|
||||||
android:id="@+id/button_load_picture"
|
android:id="@+id/button_load_picture"
|
||||||
android:layout_width="wrap_content"
|
android:layout_width="wrap_content"
|
||||||
android:layout_height="wrap_content"
|
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||||
android:text="Load Picture" />
|
android:text="@string/load_picture" />
|
||||||
|
<Button
|
||||||
|
android:id="@+id/button_load_video"
|
||||||
|
android:layout_width="wrap_content"
|
||||||
|
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||||
|
android:text="@string/load_video" />
|
||||||
<Button
|
<Button
|
||||||
android:id="@+id/button_start_camera"
|
android:id="@+id/button_start_camera"
|
||||||
android:layout_width="wrap_content"
|
android:layout_width="wrap_content"
|
||||||
android:layout_height="wrap_content"
|
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||||
android:text="Start Camera" />
|
android:text="@string/start_camera" />
|
||||||
</LinearLayout>
|
</LinearLayout>
|
||||||
<FrameLayout
|
<FrameLayout
|
||||||
android:id="@+id/preview_display_layout"
|
android:id="@+id/preview_display_layout"
|
||||||
|
@ -27,9 +32,9 @@
|
||||||
android:layout_height="match_parent">
|
android:layout_height="match_parent">
|
||||||
<TextView
|
<TextView
|
||||||
android:id="@+id/no_view"
|
android:id="@+id/no_view"
|
||||||
android:layout_width="wrap_content"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="wrap_content"
|
android:layout_height="wrap_content"
|
||||||
android:gravity="center"
|
android:gravity="center"
|
||||||
android:text="Please press any button above to start" />
|
android:text="@string/instruction" />
|
||||||
</FrameLayout>
|
</FrameLayout>
|
||||||
</LinearLayout>
|
</LinearLayout>
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
<resources>
|
<resources>
|
||||||
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
|
<string name="load_picture" translatable="false">Load Picture</string>
|
||||||
|
<string name="load_video" translatable="false">Load Video</string>
|
||||||
|
<string name="start_camera" translatable="false">Start Camera</string>
|
||||||
|
<string name="instruction" translatable="false">Please press any button above to start</string>
|
||||||
</resources>
|
</resources>
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
rootProject.name = "mediapipe-solutions-examples"
|
rootProject.name = "mediapipe-solutions-examples"
|
||||||
include ':hands'
|
include ':hands'
|
||||||
|
include ':facemesh'
|
||||||
|
|
|
@ -169,6 +169,7 @@ public class MainActivity extends AppCompatActivity {
|
||||||
|
|
||||||
public void startCamera() {
|
public void startCamera() {
|
||||||
cameraHelper = new CameraXPreviewHelper();
|
cameraHelper = new CameraXPreviewHelper();
|
||||||
|
previewFrameTexture = converter.getSurfaceTexture();
|
||||||
cameraHelper.setOnCameraStartedListener(
|
cameraHelper.setOnCameraStartedListener(
|
||||||
surfaceTexture -> {
|
surfaceTexture -> {
|
||||||
onCameraStarted(surfaceTexture);
|
onCameraStarted(surfaceTexture);
|
||||||
|
@ -178,7 +179,7 @@ public class MainActivity extends AppCompatActivity {
|
||||||
? CameraHelper.CameraFacing.FRONT
|
? CameraHelper.CameraFacing.FRONT
|
||||||
: CameraHelper.CameraFacing.BACK;
|
: CameraHelper.CameraFacing.BACK;
|
||||||
cameraHelper.startCamera(
|
cameraHelper.startCamera(
|
||||||
this, cameraFacing, /*unusedSurfaceTexture=*/ null, cameraTargetResolution());
|
this, cameraFacing, previewFrameTexture, cameraTargetResolution());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Size computeViewSize(int width, int height) {
|
protected Size computeViewSize(int width, int height) {
|
||||||
|
@ -194,11 +195,8 @@ public class MainActivity extends AppCompatActivity {
|
||||||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||||
|
|
||||||
// Connect the converter to the camera-preview frames as its input (via
|
// Configure the output width and height as the computed display size.
|
||||||
// previewFrameTexture), and configure the output width and height as the computed
|
converter.setDestinationSize(
|
||||||
// display size.
|
|
||||||
converter.setSurfaceTextureAndAttachToGLContext(
|
|
||||||
previewFrameTexture,
|
|
||||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,7 @@ cc_library(
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "object_detection_tpu",
|
name = "object_detection_tpu",
|
||||||
deps = [
|
deps = [
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
"//mediapipe/examples/coral:demo_run_graph_main",
|
"//mediapipe/examples/coral:demo_run_graph_main",
|
||||||
"//mediapipe/graphs/object_detection:desktop_tflite_calculators",
|
"//mediapipe/graphs/object_detection:desktop_tflite_calculators",
|
||||||
],
|
],
|
||||||
|
@ -51,6 +52,12 @@ cc_binary(
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "face_detection_tpu",
|
name = "face_detection_tpu",
|
||||||
deps = [
|
deps = [
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||||
"//mediapipe/examples/coral:demo_run_graph_main",
|
"//mediapipe/examples/coral:demo_run_graph_main",
|
||||||
"//mediapipe/graphs/face_detection:desktop_live_calculators",
|
"//mediapipe/graphs/face_detection:desktop_live_calculators",
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
# Copyright 2019 The MediaPipe Authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#==== ! Prerequisite ! ====
|
|
||||||
# $ sh mediapipe/examples/coral/setup.sh
|
|
||||||
#====
|
|
||||||
|
|
||||||
# for opencv 3.2 default
|
|
||||||
FROM ubuntu:18.04
|
|
||||||
|
|
||||||
MAINTAINER <mediapipe@google.com>
|
|
||||||
|
|
||||||
WORKDIR /mediapipe
|
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
|
|
||||||
# Install MediaPipe & Coral deps
|
|
||||||
|
|
||||||
COPY update_sources.sh /
|
|
||||||
RUN /update_sources.sh
|
|
||||||
|
|
||||||
RUN dpkg --add-architecture armhf
|
|
||||||
RUN dpkg --add-architecture arm64
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
crossbuild-essential-arm64 \
|
|
||||||
libusb-1.0-0-dev:arm64 \
|
|
||||||
zlibc:arm64 \
|
|
||||||
pkg-config \
|
|
||||||
zip \
|
|
||||||
unzip \
|
|
||||||
curl \
|
|
||||||
wget \
|
|
||||||
git \
|
|
||||||
python \
|
|
||||||
python-pip \
|
|
||||||
python3-pip \
|
|
||||||
python-numpy \
|
|
||||||
vim-common \
|
|
||||||
ca-certificates \
|
|
||||||
emacs \
|
|
||||||
software-properties-common && \
|
|
||||||
add-apt-repository -y ppa:openjdk-r/ppa && \
|
|
||||||
apt-get update && apt-get install -y openjdk-8-jdk
|
|
||||||
|
|
||||||
RUN pip install --upgrade setuptools
|
|
||||||
RUN pip install future
|
|
||||||
RUN pip3 install six
|
|
||||||
|
|
||||||
COPY . /mediapipe/
|
|
||||||
|
|
||||||
# Install bazel
|
|
||||||
# Please match the current MediaPipe Bazel requirements according to docs.
|
|
||||||
ARG BAZEL_VERSION=3.7.2
|
|
||||||
RUN mkdir /bazel && \
|
|
||||||
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
|
|
||||||
wget --no-check-certificate -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
|
|
||||||
chmod +x /bazel/installer.sh && \
|
|
||||||
/bazel/installer.sh && \
|
|
||||||
rm -f /bazel/installer.sh
|
|
||||||
|
|
||||||
# OpenCV (3.2 default in 18.04)
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y libopencv-dev
|
|
||||||
|
|
||||||
# Opencv libs copied from coral device into opencv32_arm64_libs
|
|
||||||
|
|
||||||
RUN cp opencv32_arm64_libs/* /usr/lib/aarch64-linux-gnu/.
|
|
||||||
|
|
||||||
# Edge tpu header and lib
|
|
||||||
|
|
||||||
RUN git clone https://github.com/google-coral/edgetpu.git /edgetpu
|
|
||||||
RUN cp /edgetpu/libedgetpu/direct/aarch64/libedgetpu.so.1.0 /usr/lib/aarch64-linux-gnu/libedgetpu.so
|
|
||||||
|
|
||||||
# See mediapipe/examples/coral/README.md to finish setup
|
|
45
mediapipe/examples/coral/Dockerfile.amd64
Normal file
45
mediapipe/examples/coral/Dockerfile.amd64
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
FROM debian:buster
|
||||||
|
MAINTAINER <mediapipe@google.com>
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
crossbuild-essential-arm64 \
|
||||||
|
pkg-config \
|
||||||
|
zip \
|
||||||
|
unzip \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
tree \
|
||||||
|
vim \
|
||||||
|
sudo \
|
||||||
|
python3-all \
|
||||||
|
python3-pip \
|
||||||
|
python3-numpy \
|
||||||
|
ca-certificates \
|
||||||
|
software-properties-common \
|
||||||
|
libusb-1.0-0-dev \
|
||||||
|
libopencv-core-dev \
|
||||||
|
libopencv-imgproc-dev \
|
||||||
|
libopencv-video-dev \
|
||||||
|
libopencv-highgui-dev \
|
||||||
|
libopencv-videoio-dev \
|
||||||
|
libopencv-contrib-dev
|
||||||
|
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||||
|
RUN wget -O /usr/bin/bazel \
|
||||||
|
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||||
|
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||||
|
chmod +x /usr/bin/bazel
|
||||||
|
ENV BAZEL_CPU=k8
|
47
mediapipe/examples/coral/Dockerfile.arm64
Normal file
47
mediapipe/examples/coral/Dockerfile.arm64
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
FROM debian:buster
|
||||||
|
MAINTAINER <mediapipe@google.com>
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
crossbuild-essential-arm64 \
|
||||||
|
pkg-config \
|
||||||
|
zip \
|
||||||
|
unzip \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
tree \
|
||||||
|
vim \
|
||||||
|
sudo \
|
||||||
|
python3-all \
|
||||||
|
python3-pip \
|
||||||
|
python3-numpy \
|
||||||
|
ca-certificates \
|
||||||
|
software-properties-common
|
||||||
|
RUN dpkg --add-architecture arm64
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
libusb-1.0-0-dev:arm64 \
|
||||||
|
libopencv-core-dev:arm64 \
|
||||||
|
libopencv-imgproc-dev:arm64 \
|
||||||
|
libopencv-video-dev:arm64 \
|
||||||
|
libopencv-highgui-dev:arm64 \
|
||||||
|
libopencv-videoio-dev:arm64 \
|
||||||
|
libopencv-contrib-dev:arm64
|
||||||
|
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||||
|
RUN wget -O /usr/bin/bazel \
|
||||||
|
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||||
|
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||||
|
chmod +x /usr/bin/bazel
|
||||||
|
ENV BAZEL_CPU=aarch64
|
47
mediapipe/examples/coral/Dockerfile.armhf
Normal file
47
mediapipe/examples/coral/Dockerfile.armhf
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
FROM debian:buster
|
||||||
|
MAINTAINER <mediapipe@google.com>
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
crossbuild-essential-armhf \
|
||||||
|
pkg-config \
|
||||||
|
zip \
|
||||||
|
unzip \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
git \
|
||||||
|
tree \
|
||||||
|
vim \
|
||||||
|
sudo \
|
||||||
|
python3-all \
|
||||||
|
python3-pip \
|
||||||
|
python3-numpy \
|
||||||
|
ca-certificates \
|
||||||
|
software-properties-common
|
||||||
|
RUN dpkg --add-architecture armhf
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
libusb-1.0-0-dev:armhf \
|
||||||
|
libopencv-core-dev:armhf \
|
||||||
|
libopencv-imgproc-dev:armhf \
|
||||||
|
libopencv-video-dev:armhf \
|
||||||
|
libopencv-highgui-dev:armhf \
|
||||||
|
libopencv-videoio-dev:armhf \
|
||||||
|
libopencv-contrib-dev:armhf
|
||||||
|
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||||
|
RUN wget -O /usr/bin/bazel \
|
||||||
|
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||||
|
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||||
|
chmod +x /usr/bin/bazel
|
||||||
|
ENV BAZEL_CPU=armv7a
|
55
mediapipe/examples/coral/Makefile
Normal file
55
mediapipe/examples/coral/Makefile
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
SHELL := /bin/bash
|
||||||
|
|
||||||
|
MAKEFILE_DIR := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
|
||||||
|
MEDIAPIPE_DIR := $(MAKEFILE_DIR)/../../..
|
||||||
|
|
||||||
|
BAZEL_COMPILATION_MODE ?= opt
|
||||||
|
BAZEL_TARGET ?= mediapipe/examples/coral:face_detection_tpu
|
||||||
|
BAZEL_CPU ?= k8
|
||||||
|
|
||||||
|
OUT_DIR := $(MEDIAPIPE_DIR)/out/$(BAZEL_CPU)
|
||||||
|
|
||||||
|
PLATFORM ?= amd64
|
||||||
|
DOCKER_FILE ?= $(MAKEFILE_DIR)/Dockerfile.$(PLATFORM)
|
||||||
|
DOCKER_COMMAND ?=
|
||||||
|
|
||||||
|
bazel_output = $(MEDIAPIPE_DIR)/bazel-bin/$(subst :,/,$(1))
|
||||||
|
|
||||||
|
define run_command
|
||||||
|
chmod a+w /; \
|
||||||
|
groupadd --gid $(shell id -g) $(shell id -g -n); \
|
||||||
|
useradd -m -e '' -s /bin/bash --gid $(shell id -g) --uid $(shell id -u) $(shell id -u -n); \
|
||||||
|
echo '$(shell id -u -n) ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers; \
|
||||||
|
su $(shell id -u -n) $(if $(1),-c '$(1)',)
|
||||||
|
endef
|
||||||
|
|
||||||
|
.PHONY: help
|
||||||
|
help:
|
||||||
|
@echo "make help - Print help"
|
||||||
|
@echo "make docker - Run Docker environment"
|
||||||
|
@echo "make build - Run Bazel build, use BAZEL_TARGET to choose which target to build"
|
||||||
|
|
||||||
|
ifeq (,$(wildcard /.dockerenv))
|
||||||
|
.PHONY: docker
|
||||||
|
docker:
|
||||||
|
docker run --rm -i --tty \
|
||||||
|
-v $(MEDIAPIPE_DIR):/mediapipe \
|
||||||
|
--workdir /mediapipe/ \
|
||||||
|
$(shell docker build -q - < $(DOCKER_FILE)) \
|
||||||
|
/bin/bash -c "$(call run_command,$(DOCKER_COMMAND))"
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: build
|
||||||
|
build:
|
||||||
|
(cd $(MEDIAPIPE_DIR) && \
|
||||||
|
bazel build \
|
||||||
|
--crosstool_top=@crosstool//:toolchains \
|
||||||
|
--compiler=gcc \
|
||||||
|
--cpu=${BAZEL_CPU} \
|
||||||
|
--compilation_mode=${BAZEL_COMPILATION_MODE} \
|
||||||
|
--define darwinn_portable=1 \
|
||||||
|
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||||
|
--define MEDIAPIPE_EDGE_TPU=all \
|
||||||
|
$(BAZEL_TARGET) && \
|
||||||
|
mkdir -p $(OUT_DIR) && \
|
||||||
|
cp -f $(call bazel_output,$(BAZEL_TARGET)) $(OUT_DIR))
|
|
@ -1,156 +1,173 @@
|
||||||
# Coral Dev Board Setup (experimental)
|
# Coral Support
|
||||||
|
|
||||||
**Disclaimer**: Running MediaPipe on Coral is experimental, and this process may
|
## Bazel Setup
|
||||||
not be exact and is subject to change. These instructions have only been tested
|
|
||||||
on the [Coral Dev Board](https://coral.ai/products/dev-board/)
|
|
||||||
running [Mendel Enterprise Day 13](https://coral.ai/software/) OS and
|
|
||||||
using [Diploria2](https://github.com/google-coral/edgetpu/tree/diploria2)
|
|
||||||
edgetpu libs, and may vary for different devices and workstations.
|
|
||||||
|
|
||||||
This file describes how to prepare a Coral Dev Board and setup a Linux
|
You can compile MediaPipe with enabled Edge TPU support to run
|
||||||
Docker container for building MediaPipe applications that run on Edge TPU.
|
[Coral models](http://coral.ai/models). Just add
|
||||||
|
`--define MEDIAPIPE_EDGE_TPU=<type>` to the `bazel` command:
|
||||||
|
|
||||||
## Before creating the Docker
|
* `--define MEDIAPIPE_EDGE_TPU=usb` for Coral USB devices on Linux and macOS
|
||||||
|
* `--define MEDIAPIPE_EDGE_TPU=pci` for Coral PCIe devices on Linux
|
||||||
|
* `--define MEDIAPIPE_EDGE_TPU=all` for both Coral USB and PCIe devices on Linux
|
||||||
|
|
||||||
* (on host machine) run _setup.sh_ from MediaPipe root directory
|
You have to install `libusb` library in order to compile with USB support:
|
||||||
|
|
||||||
sh mediapipe/examples/coral/setup.sh
|
* `libusb-1.0-0-dev` on Linux
|
||||||
|
* `libusb` on macOS via MacPorts or Homebrew
|
||||||
|
|
||||||
* Setup the coral device via [here](https://coral.withgoogle.com/docs/dev-board/get-started/), and ensure the _mdt_ command works
|
Command to compile face detection Coral example:
|
||||||
|
|
||||||
Note: alias mdt="python3 -m mdt.main" may be needed on some systems
|
```bash
|
||||||
|
bazel build \
|
||||||
|
--compilation_mode=opt \
|
||||||
|
--define darwinn_portable=1 \
|
||||||
|
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||||
|
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||||
|
--linkopt=-l:libusb-1.0.so \
|
||||||
|
mediapipe/examples/coral:face_detection_tpu build
|
||||||
|
```
|
||||||
|
|
||||||
* (on coral device) prepare MediaPipe
|
## Cross-compilation
|
||||||
|
|
||||||
cd ~
|
Sometimes you need to cross-compile MediaPipe source code, e.g. get `ARM32`
|
||||||
sudo apt-get update && sudo apt-get install -y git
|
or `ARM64` binaries on `x86` system. Install cross-compilation toolchain on
|
||||||
git clone https://github.com/google/mediapipe.git
|
your system or use our preconfigured Docker environment for that:
|
||||||
mkdir mediapipe/bazel-bin
|
|
||||||
|
|
||||||
* (on coral device) install opencv 3.2
|
```bash
|
||||||
|
# For ARM32 (e.g. Raspberry Pi)
|
||||||
|
make -C mediapipe/examples/coral PLATFORM=armhf docker
|
||||||
|
|
||||||
sudo apt-get update && sudo apt-get install -y libopencv-dev
|
# For ARM64 (e.g. Coral Dev Board)
|
||||||
|
make -C mediapipe/examples/coral PLATFORM=arm64 docker
|
||||||
|
```
|
||||||
|
|
||||||
* (on coral device) find all opencv libs
|
After running this command you'll get a shell to the Docker environment which
|
||||||
|
has everything ready to start compilation:
|
||||||
|
|
||||||
find /usr/lib/aarch64-linux-gnu/ -name 'libopencv*so'
|
```bash
|
||||||
|
# For ARM32 (e.g. Raspberry Pi)
|
||||||
|
bazel build \
|
||||||
|
--crosstool_top=@crosstool//:toolchains \
|
||||||
|
--compiler=gcc \
|
||||||
|
--cpu=armv7a \
|
||||||
|
--define darwinn_portable=1 \
|
||||||
|
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||||
|
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||||
|
--linkopt=-l:libusb-1.0.so \
|
||||||
|
mediapipe/examples/coral:face_detection_tpu build
|
||||||
|
|
||||||
* (on host machine) copy core opencv libs from coral device to a local folder inside MediaPipe checkout:
|
# For ARM64 (e.g. Coral Dev Board)
|
||||||
|
bazel build \
|
||||||
|
--crosstool_top=@crosstool//:toolchains \
|
||||||
|
--compiler=gcc \
|
||||||
|
--cpu=aarch64 \
|
||||||
|
--define darwinn_portable=1 \
|
||||||
|
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||||
|
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||||
|
--linkopt=-l:libusb-1.0.so \
|
||||||
|
mediapipe/examples/coral:face_detection_tpu build
|
||||||
|
```
|
||||||
|
|
||||||
# in root level mediapipe folder #
|
Our Docker environment defines `${BAZEL_CPU}` value, so you can use it directly:
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_core.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_calib3d.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_features2d.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_highgui.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgcodecs.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgproc.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_video.so opencv32_arm64_libs
|
|
||||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_videoio.so opencv32_arm64_libs
|
|
||||||
|
|
||||||
* (on host machine) Create and start the docker environment
|
```bash
|
||||||
|
bazel build \
|
||||||
|
--crosstool_top=@crosstool//:toolchains \
|
||||||
|
--compiler=gcc \
|
||||||
|
--cpu=${BAZEL_CPU} \
|
||||||
|
--define darwinn_portable=1 \
|
||||||
|
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||||
|
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||||
|
--linkopt=-l:libusb-1.0.so \
|
||||||
|
mediapipe/examples/coral:face_detection_tpu build
|
||||||
|
```
|
||||||
|
|
||||||
# from mediapipe root level directory #
|
The command above is already defined in our `Makefile`, so you can simply run:
|
||||||
docker build -t coral .
|
|
||||||
docker run -it --name coral coral:latest
|
|
||||||
|
|
||||||
## Inside the Docker environment
|
```bash
|
||||||
|
make -C mediapipe/examples/coral \
|
||||||
|
BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu \
|
||||||
|
build
|
||||||
|
```
|
||||||
|
|
||||||
* Update library paths in /mediapipe/third_party/opencv_linux.BUILD
|
The output binary will be automatically copied to `out/<platform>` directory.
|
||||||
|
|
||||||
(replace 'x86_64-linux-gnu' with 'aarch64-linux-gnu')
|
You can also run compilation inside Docker environment as a single
|
||||||
|
command:
|
||||||
|
|
||||||
"lib/aarch64-linux-gnu/libopencv_core.so",
|
```bash
|
||||||
"lib/aarch64-linux-gnu/libopencv_calib3d.so",
|
make -C mediapipe/examples/coral \
|
||||||
"lib/aarch64-linux-gnu/libopencv_features2d.so",
|
PLATFORM=armhf \
|
||||||
"lib/aarch64-linux-gnu/libopencv_highgui.so",
|
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
|
||||||
"lib/aarch64-linux-gnu/libopencv_imgcodecs.so",
|
docker
|
||||||
"lib/aarch64-linux-gnu/libopencv_imgproc.so",
|
```
|
||||||
"lib/aarch64-linux-gnu/libopencv_video.so",
|
|
||||||
"lib/aarch64-linux-gnu/libopencv_videoio.so",
|
|
||||||
|
|
||||||
* Attempt to build hello world (to download external deps)
|
and get the output binary from `out/<platform>` directory. Any Mediapipe target
|
||||||
|
can be cross-compiled this way, e.g. try
|
||||||
|
`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`.
|
||||||
|
|
||||||
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world
|
To summarize everything:
|
||||||
|
|
||||||
* Edit /edgetpu/libedgetpu/BUILD
|
| Arch | PLATFORM | Output | Board |
|
||||||
|
| ----- | -------------- | ----------- | -------------------------------------------------------- |
|
||||||
|
| ARM32 | PLATFORM=armhf | out/armv7a | [Raspberry Pi](https://www.raspberrypi.org/products/) |
|
||||||
|
| ARM64 | PLATFORM=arm64 | out/aarch64 | [Coral Dev Board](https://coral.ai/products/dev-board/) |
|
||||||
|
|
||||||
to add this build target
|
## Coral Examples
|
||||||
|
|
||||||
cc_library(
|
There are two Coral examples in `mediapipe/examples/coral` directory. Compile
|
||||||
name = "lib",
|
them for your platform:
|
||||||
srcs = [
|
|
||||||
"libedgetpu.so",
|
|
||||||
],
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
* Edit /edgetpu/WORKSPACE
|
```bash
|
||||||
|
# Face detection
|
||||||
|
make -C mediapipe/examples/coral \
|
||||||
|
PLATFORM=armhf \
|
||||||
|
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
|
||||||
|
docker
|
||||||
|
|
||||||
update /mediapipe/WORKSPACE TENSORFLOW_* variables to match what /edgetpu/WORKSPACE has:
|
# Object detection
|
||||||
|
make -C mediapipe/examples/coral \
|
||||||
|
PLATFORM=armhf \
|
||||||
|
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:object_detection_tpu build" \
|
||||||
|
docker
|
||||||
|
```
|
||||||
|
|
||||||
grep TENSORFLOW_ /mediapipe/WORKSPACE
|
Copy output binaries along with corresponding auxiliary files to your target
|
||||||
grep TENSORFLOW_ /edgetpu/WORKSPACE
|
system. You can copy the whole `mediapipe` folder for simplicity:
|
||||||
|
|
||||||
# Make sure the /mediapipe/WORKSPACE _TENSORFLOW_GIT_COMMIT and _TENSORFLOW_SHA256
|
```bash
|
||||||
# match the /edgetpu/WORKSPACE TENSORFLOW_COMMIT and TENSORFLOW_SHA256 respectively.
|
scp -r mediapipe <user>@<host>:.
|
||||||
|
```
|
||||||
|
|
||||||
# If they do not match, modify /mediapipe/WORKSPACE to match what /edgetpu/WORKSPACE has.
|
OpenCV runtime libraries need to be installed on your target system:
|
||||||
# Also comment out the MediaPipe org_tensorflow patch section.
|
|
||||||
|
|
||||||
* Edit /mediapipe/mediapipe/calculators/tflite/BUILD to change rules for *tflite_inference_calculator.cc*
|
```bash
|
||||||
|
sudo apt-get install -y \
|
||||||
|
libopencv-core-dev \
|
||||||
|
libopencv-highgui-dev \
|
||||||
|
libopencv-calib3d-dev \
|
||||||
|
libopencv-features2d-dev \
|
||||||
|
libopencv-imgproc-dev \
|
||||||
|
libopencv-video-dev
|
||||||
|
```
|
||||||
|
|
||||||
sed -i 's/\":tflite_inference_calculator_cc_proto\",/\":tflite_inference_calculator_cc_proto\",\n\t\"@edgetpu\/\/:header\",\n\t\"@libedgetpu\/\/:lib\",/g' /mediapipe/mediapipe/calculators/tflite/BUILD
|
If you are going to connect Coral USB accelerator to your target system then
|
||||||
|
you'll also need `libusb` library:
|
||||||
|
|
||||||
The above command should add
|
```shell
|
||||||
|
sudo apt-get install -y \
|
||||||
|
libusb-1.0-0
|
||||||
|
```
|
||||||
|
|
||||||
"@edgetpu//:header",
|
Connect USB camera and Coral device to your target system and run the copied
|
||||||
"@libedgetpu//:lib",
|
binaries:
|
||||||
|
|
||||||
to the _deps_ of tflite_inference_calculator.cc
|
```bash
|
||||||
|
# Face Detection
|
||||||
Now also remove XNNPACK deps:
|
GLOG_logtostderr=1 ./face_detection_tpu --calculator_graph_config_file \
|
||||||
|
mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
|
||||||
sed -i 's/\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/#\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/g' /mediapipe/mediapipe/calculators/tflite/BUILD
|
|
||||||
|
|
||||||
#### Now try cross-compiling for device
|
|
||||||
|
|
||||||
* Object detection demo
|
|
||||||
|
|
||||||
![Object Detection running on Coral](./images/object_detection_demo_coral.jpg)
|
|
||||||
|
|
||||||
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:object_detection_tpu
|
|
||||||
|
|
||||||
Copy object_detection_tpu binary to the MediaPipe checkout on the coral device
|
|
||||||
|
|
||||||
# outside docker env, open new terminal on host machine #
|
|
||||||
docker ps
|
|
||||||
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/object_detection_tpu /tmp/.
|
|
||||||
mdt push /tmp/object_detection_tpu /home/mendel/mediapipe/bazel-bin/.
|
|
||||||
|
|
||||||
* Face detection demo
|
|
||||||
|
|
||||||
![Face Detection running on Coral](./images/face_detection_demo_coral.gif)
|
|
||||||
|
|
||||||
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:face_detection_tpu
|
|
||||||
|
|
||||||
Copy face_detection_tpu binary to the MediaPipe checkout on the coral device
|
|
||||||
|
|
||||||
# outside docker env, open new terminal on host machine #
|
|
||||||
docker ps
|
|
||||||
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/face_detection_tpu /tmp/.
|
|
||||||
mdt push /tmp/face_detection_tpu /home/mendel/mediapipe/bazel-bin/.
|
|
||||||
|
|
||||||
## On the coral device (with display)
|
|
||||||
|
|
||||||
# Object detection
|
|
||||||
cd ~/mediapipe
|
|
||||||
chmod +x bazel-bin/object_detection_tpu
|
|
||||||
export GLOG_logtostderr=1
|
|
||||||
bazel-bin/object_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
|
|
||||||
|
|
||||||
# Face detection
|
|
||||||
cd ~/mediapipe
|
|
||||||
chmod +x bazel-bin/face_detection_tpu
|
|
||||||
export GLOG_logtostderr=1
|
|
||||||
bazel-bin/face_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
|
|
||||||
|
|
||||||
|
# Object Detection
|
||||||
|
GLOG_logtostderr=1 ./object_detection_tpu --calculator_graph_config_file \
|
||||||
|
mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
|
||||||
|
```
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
|
|
||||||
### Coral additions to MediaPipe WORKSPACE ###
|
|
||||||
|
|
||||||
#COMMIT=$(git ls-remote https://github.com/google-coral/crosstool master | awk '{print $1}')
|
|
||||||
#SHA256=$(curl -L "https://github.com/google-coral/crosstool/archive/${COMMIT}.tar.gz" | sha256sum | awk '{print $1}')
|
|
||||||
# Oct 2019
|
|
||||||
#COMMIT=9e00d5be43bf001f883b5700f5d04882fea00229
|
|
||||||
#SHA256=cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb
|
|
||||||
http_archive(
|
|
||||||
name = "coral_crosstool",
|
|
||||||
sha256 = "cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb",
|
|
||||||
strip_prefix = "crosstool-9e00d5be43bf001f883b5700f5d04882fea00229",
|
|
||||||
urls = [
|
|
||||||
"https://github.com/google-coral/crosstool/archive/9e00d5be43bf001f883b5700f5d04882fea00229.tar.gz",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
|
|
||||||
cc_crosstool(name = "crosstool")
|
|
||||||
|
|
||||||
# EdgeTPU
|
|
||||||
new_local_repository(
|
|
||||||
name = "edgetpu",
|
|
||||||
path = "/edgetpu/libedgetpu",
|
|
||||||
build_file = "/edgetpu/libedgetpu/BUILD"
|
|
||||||
)
|
|
||||||
new_local_repository(
|
|
||||||
name = "libedgetpu",
|
|
||||||
path = "/usr/lib/aarch64-linux-gnu",
|
|
||||||
build_file = "/edgetpu/libedgetpu/BUILD"
|
|
||||||
)
|
|
|
@ -74,43 +74,12 @@ node {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generates a single side packet containing a vector of SSD anchors based on
|
|
||||||
# the specification in the options.
|
|
||||||
node {
|
|
||||||
calculator: "SsdAnchorsCalculator"
|
|
||||||
output_side_packet: "anchors"
|
|
||||||
options: {
|
|
||||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
|
||||||
num_layers: 6
|
|
||||||
min_scale: 0.2
|
|
||||||
max_scale: 0.95
|
|
||||||
input_size_height: 300
|
|
||||||
input_size_width: 300
|
|
||||||
anchor_offset_x: 0.5
|
|
||||||
anchor_offset_y: 0.5
|
|
||||||
strides: 16
|
|
||||||
strides: 32
|
|
||||||
strides: 64
|
|
||||||
strides: 128
|
|
||||||
strides: 256
|
|
||||||
strides: 512
|
|
||||||
aspect_ratios: 1.0
|
|
||||||
aspect_ratios: 2.0
|
|
||||||
aspect_ratios: 0.5
|
|
||||||
aspect_ratios: 3.0
|
|
||||||
aspect_ratios: 0.3333
|
|
||||||
reduce_boxes_in_lowest_layer: true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
# the SSD anchors and the specification in the options, into a vector of
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
# detections. Each detection describes a detected object.
|
# detections. Each detection describes a detected object.
|
||||||
node {
|
node {
|
||||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||||
input_stream: "TENSORS:detection_tensors"
|
input_stream: "TENSORS:detection_tensors"
|
||||||
input_side_packet: "ANCHORS:anchors"
|
|
||||||
output_stream: "DETECTIONS:detections"
|
output_stream: "DETECTIONS:detections"
|
||||||
options: {
|
options: {
|
||||||
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
|
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -v
|
|
||||||
|
|
||||||
echo 'Please run this from root level mediapipe directory! \n Ex:'
|
|
||||||
echo ' sh mediapipe/examples/coral/setup.sh '
|
|
||||||
|
|
||||||
sleep 3
|
|
||||||
|
|
||||||
mkdir -p opencv32_arm64_libs
|
|
||||||
|
|
||||||
# prepare docker aux script
|
|
||||||
cp mediapipe/examples/coral/update_sources.sh update_sources.sh
|
|
||||||
chmod +x update_sources.sh
|
|
||||||
|
|
||||||
# backup non-coral Dockerfile
|
|
||||||
mv Dockerfile Dockerfile.orig
|
|
||||||
cp mediapipe/examples/coral/Dockerfile Dockerfile
|
|
||||||
|
|
||||||
# backup non-coral workspace
|
|
||||||
cp WORKSPACE WORKSPACE.orig
|
|
||||||
|
|
||||||
# create temps
|
|
||||||
cp WORKSPACE WORKSPACE.1
|
|
||||||
cp mediapipe/examples/coral/WORKSPACE.coral WORKSPACE.2
|
|
||||||
|
|
||||||
# merge (shell decides concat order, unless numbered appropriately)
|
|
||||||
cat WORKSPACE.1 WORKSPACE.2 > WORKSPACE
|
|
||||||
|
|
||||||
# cleanup
|
|
||||||
rm WORKSPACE.1 WORKSPACE.2
|
|
||||||
|
|
||||||
echo 'done'
|
|
|
@ -1,11 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# To run in the Coral Docker environment.
|
|
||||||
|
|
||||||
. /etc/os-release
|
|
||||||
|
|
||||||
sed -i "s/deb\ /deb \[arch=amd64\]\ /g" /etc/apt/sources.list
|
|
||||||
|
|
||||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME} main universe" >> /etc/apt/sources.list
|
|
||||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-updates main universe" >> /etc/apt/sources.list
|
|
||||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-security main universe" >> /etc/apt/sources.list
|
|
|
@ -17,8 +17,10 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library"
|
||||||
licenses(["notice"])
|
licenses(["notice"])
|
||||||
|
|
||||||
package(default_visibility = [
|
package(default_visibility = [
|
||||||
|
"//buzz/diffractor/mediapipe:__subpackages__",
|
||||||
"//mediapipe/examples:__subpackages__",
|
"//mediapipe/examples:__subpackages__",
|
||||||
"//mediapipe/viz:__subpackages__",
|
"//mediapipe/viz:__subpackages__",
|
||||||
|
"//mediapipe/web/solutions:__subpackages__",
|
||||||
])
|
])
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
|
|
|
@ -43,6 +43,9 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kDetectedBordersTag[] = "DETECTED_BORDERS";
|
||||||
|
constexpr char kVideoTag[] = "VIDEO";
|
||||||
|
|
||||||
const char kConfig[] = R"(
|
const char kConfig[] = R"(
|
||||||
calculator: "BorderDetectionCalculator"
|
calculator: "BorderDetectionCalculator"
|
||||||
input_stream: "VIDEO:camera_frames"
|
input_stream: "VIDEO:camera_frames"
|
||||||
|
@ -81,14 +84,14 @@ TEST(BorderDetectionCalculatorTest, NoBorderTest) {
|
||||||
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
|
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
|
||||||
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
|
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
|
||||||
input_mat.setTo(cv::Scalar(0, 0, 0));
|
input_mat.setTo(cv::Scalar(0, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(0, static_features.border().size());
|
ASSERT_EQ(0, static_features.border().size());
|
||||||
|
@ -115,14 +118,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderTest) {
|
||||||
cv::Mat sub_image =
|
cv::Mat sub_image =
|
||||||
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
||||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(1, static_features.border().size());
|
ASSERT_EQ(1, static_features.border().size());
|
||||||
|
@ -155,14 +158,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderPadTest) {
|
||||||
cv::Mat sub_image =
|
cv::Mat sub_image =
|
||||||
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
||||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(1, static_features.border().size());
|
ASSERT_EQ(1, static_features.border().size());
|
||||||
|
@ -197,14 +200,14 @@ TEST(BorderDetectionCalculatorTest, BottomBorderTest) {
|
||||||
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
||||||
kTestFrameWidth, kBottomBorderHeight));
|
kTestFrameWidth, kBottomBorderHeight));
|
||||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(1, static_features.border().size());
|
ASSERT_EQ(1, static_features.border().size());
|
||||||
|
@ -238,14 +241,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTest) {
|
||||||
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
||||||
kTestFrameWidth, kBottomBorderHeight));
|
kTestFrameWidth, kBottomBorderHeight));
|
||||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(2, static_features.border().size());
|
ASSERT_EQ(2, static_features.border().size());
|
||||||
|
@ -291,14 +294,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTestAspect2) {
|
||||||
input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight,
|
input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight,
|
||||||
kTestFrameWidthTall, kBottomBorderHeight));
|
kTestFrameWidthTall, kBottomBorderHeight));
|
||||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(2, static_features.border().size());
|
ASSERT_EQ(2, static_features.border().size());
|
||||||
|
@ -352,14 +355,14 @@ TEST(BorderDetectionCalculatorTest, DominantColor) {
|
||||||
input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2));
|
input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2));
|
||||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
|
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
ASSERT_EQ(0, static_features.border().size());
|
ASSERT_EQ(0, static_features.border().size());
|
||||||
|
@ -383,7 +386,7 @@ void BM_Large(benchmark::State& state) {
|
||||||
cv::Mat sub_image =
|
cv::Mat sub_image =
|
||||||
input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight));
|
input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight));
|
||||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
|
|
|
@ -31,7 +31,11 @@ constexpr char kVideoSize[] = "VIDEO_SIZE";
|
||||||
constexpr char kSalientRegions[] = "SALIENT_REGIONS";
|
constexpr char kSalientRegions[] = "SALIENT_REGIONS";
|
||||||
constexpr char kDetections[] = "DETECTIONS";
|
constexpr char kDetections[] = "DETECTIONS";
|
||||||
constexpr char kDetectedBorders[] = "BORDERS";
|
constexpr char kDetectedBorders[] = "BORDERS";
|
||||||
|
// Crop location as abs rect discretized.
|
||||||
constexpr char kCropRect[] = "CROP_RECT";
|
constexpr char kCropRect[] = "CROP_RECT";
|
||||||
|
// Crop location as normalized rect.
|
||||||
|
constexpr char kNormalizedCropRect[] = "NORMALIZED_CROP_RECT";
|
||||||
|
// Crop location without position smoothing.
|
||||||
constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
|
constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
|
||||||
// Can be used to control whether an animated zoom should actually performed
|
// Can be used to control whether an animated zoom should actually performed
|
||||||
// (configured through option us_to_first_rect). If provided, a non-zero integer
|
// (configured through option us_to_first_rect). If provided, a non-zero integer
|
||||||
|
@ -51,6 +55,8 @@ constexpr float kFieldOfView = 60;
|
||||||
// Used to save state on Close and load state on Open in a new graph.
|
// Used to save state on Close and load state on Open in a new graph.
|
||||||
// Can be used to preserve state between graphs.
|
// Can be used to preserve state between graphs.
|
||||||
constexpr char kStateCache[] = "STATE_CACHE";
|
constexpr char kStateCache[] = "STATE_CACHE";
|
||||||
|
// Tolerance for zooming out recentering.
|
||||||
|
constexpr float kPixelTolerance = 3;
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
|
@ -166,6 +172,9 @@ absl::Status ContentZoomingCalculator::GetContract(
|
||||||
if (cc->Outputs().HasTag(kCropRect)) {
|
if (cc->Outputs().HasTag(kCropRect)) {
|
||||||
cc->Outputs().Tag(kCropRect).Set<mediapipe::Rect>();
|
cc->Outputs().Tag(kCropRect).Set<mediapipe::Rect>();
|
||||||
}
|
}
|
||||||
|
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||||
|
cc->Outputs().Tag(kNormalizedCropRect).Set<mediapipe::NormalizedRect>();
|
||||||
|
}
|
||||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||||
cc->Outputs().Tag(kFirstCropRect).Set<mediapipe::NormalizedRect>();
|
cc->Outputs().Tag(kFirstCropRect).Set<mediapipe::NormalizedRect>();
|
||||||
}
|
}
|
||||||
|
@ -553,6 +562,16 @@ absl::Status ContentZoomingCalculator::Process(
|
||||||
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
|
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
|
||||||
Timestamp(cc->InputTimestamp()));
|
Timestamp(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
|
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||||
|
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
|
||||||
|
default_rect->set_x_center(0.5);
|
||||||
|
default_rect->set_y_center(0.5);
|
||||||
|
default_rect->set_width(1.0);
|
||||||
|
default_rect->set_height(1.0);
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kNormalizedCropRect)
|
||||||
|
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
// Also provide a first crop rect: in this case a zero-sized one.
|
// Also provide a first crop rect: in this case a zero-sized one.
|
||||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
|
@ -634,9 +653,9 @@ absl::Status ContentZoomingCalculator::Process(
|
||||||
// Compute smoothed zoom camera path.
|
// Compute smoothed zoom camera path.
|
||||||
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
|
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
|
||||||
height, cc->InputTimestamp().Microseconds()));
|
height, cc->InputTimestamp().Microseconds()));
|
||||||
int path_height;
|
float path_height;
|
||||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
|
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
|
||||||
int path_width = path_height * target_aspect_;
|
float path_width = path_height * target_aspect_;
|
||||||
|
|
||||||
// Update pixel-per-degree value for pan/tilt.
|
// Update pixel-per-degree value for pan/tilt.
|
||||||
int target_height;
|
int target_height;
|
||||||
|
@ -652,11 +671,48 @@ absl::Status ContentZoomingCalculator::Process(
|
||||||
offset_x, cc->InputTimestamp().Microseconds()));
|
offset_x, cc->InputTimestamp().Microseconds()));
|
||||||
MP_RETURN_IF_ERROR(path_solver_tilt_->AddObservation(
|
MP_RETURN_IF_ERROR(path_solver_tilt_->AddObservation(
|
||||||
offset_y, cc->InputTimestamp().Microseconds()));
|
offset_y, cc->InputTimestamp().Microseconds()));
|
||||||
int path_offset_x;
|
float path_offset_x;
|
||||||
MP_RETURN_IF_ERROR(path_solver_pan_->GetState(&path_offset_x));
|
MP_RETURN_IF_ERROR(path_solver_pan_->GetState(&path_offset_x));
|
||||||
int path_offset_y;
|
float path_offset_y;
|
||||||
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
|
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
|
||||||
|
|
||||||
|
float delta_height;
|
||||||
|
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
|
||||||
|
int delta_width = delta_height * target_aspect_;
|
||||||
|
|
||||||
|
// Smooth centering when zooming out.
|
||||||
|
float remaining_width = target_width - path_width;
|
||||||
|
int width_space = frame_width_ - target_width;
|
||||||
|
if (abs(path_offset_x - frame_width_ / 2) >
|
||||||
|
width_space / 2 + kPixelTolerance &&
|
||||||
|
remaining_width > kPixelTolerance) {
|
||||||
|
float required_width =
|
||||||
|
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
|
||||||
|
if (path_offset_x < frame_width_ / 2) {
|
||||||
|
path_offset_x += delta_width * (required_width / remaining_width);
|
||||||
|
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||||
|
} else {
|
||||||
|
path_offset_x -= delta_width * (required_width / remaining_width);
|
||||||
|
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float remaining_height = target_height - path_height;
|
||||||
|
int height_space = frame_height_ - target_height;
|
||||||
|
if (abs(path_offset_y - frame_height_ / 2) >
|
||||||
|
height_space / 2 + kPixelTolerance &&
|
||||||
|
remaining_height > kPixelTolerance) {
|
||||||
|
float required_height =
|
||||||
|
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
|
||||||
|
if (path_offset_y < frame_height_ / 2) {
|
||||||
|
path_offset_y += delta_height * (required_height / remaining_height);
|
||||||
|
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||||
|
} else {
|
||||||
|
path_offset_y -= delta_height * (required_height / remaining_height);
|
||||||
|
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Prevent box from extending beyond the image after camera smoothing.
|
// Prevent box from extending beyond the image after camera smoothing.
|
||||||
if (path_offset_y - ceil(path_height / 2.0) < 0) {
|
if (path_offset_y - ceil(path_height / 2.0) < 0) {
|
||||||
path_offset_y = ceil(path_height / 2.0);
|
path_offset_y = ceil(path_height / 2.0);
|
||||||
|
@ -705,7 +761,7 @@ absl::Status ContentZoomingCalculator::Process(
|
||||||
is_animating = IsAnimatingToFirstRect(cc->InputTimestamp());
|
is_animating = IsAnimatingToFirstRect(cc->InputTimestamp());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transmit downstream to glcroppingcalculator.
|
// Transmit downstream to glcroppingcalculator in discrete int values.
|
||||||
if (cc->Outputs().HasTag(kCropRect)) {
|
if (cc->Outputs().HasTag(kCropRect)) {
|
||||||
std::unique_ptr<mediapipe::Rect> gpu_rect;
|
std::unique_ptr<mediapipe::Rect> gpu_rect;
|
||||||
if (is_animating) {
|
if (is_animating) {
|
||||||
|
@ -716,13 +772,36 @@ absl::Status ContentZoomingCalculator::Process(
|
||||||
} else {
|
} else {
|
||||||
gpu_rect = absl::make_unique<mediapipe::Rect>();
|
gpu_rect = absl::make_unique<mediapipe::Rect>();
|
||||||
gpu_rect->set_x_center(path_offset_x);
|
gpu_rect->set_x_center(path_offset_x);
|
||||||
gpu_rect->set_width(path_height * target_aspect_);
|
gpu_rect->set_width(path_width);
|
||||||
gpu_rect->set_y_center(path_offset_y);
|
gpu_rect->set_y_center(path_offset_y);
|
||||||
gpu_rect->set_height(path_height);
|
gpu_rect->set_height(path_height);
|
||||||
}
|
}
|
||||||
cc->Outputs().Tag(kCropRect).Add(gpu_rect.release(),
|
cc->Outputs().Tag(kCropRect).Add(gpu_rect.release(),
|
||||||
Timestamp(cc->InputTimestamp()));
|
Timestamp(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
|
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||||
|
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
|
||||||
|
absl::make_unique<mediapipe::NormalizedRect>();
|
||||||
|
float float_frame_width = static_cast<float>(frame_width_);
|
||||||
|
float float_frame_height = static_cast<float>(frame_height_);
|
||||||
|
if (is_animating) {
|
||||||
|
auto rect =
|
||||||
|
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
|
||||||
|
MP_RETURN_IF_ERROR(rect.status());
|
||||||
|
gpu_rect->set_x_center(rect->x_center() / float_frame_width);
|
||||||
|
gpu_rect->set_width(rect->width() / float_frame_width);
|
||||||
|
gpu_rect->set_y_center(rect->y_center() / float_frame_height);
|
||||||
|
gpu_rect->set_height(rect->height() / float_frame_height);
|
||||||
|
} else {
|
||||||
|
gpu_rect->set_x_center(path_offset_x / float_frame_width);
|
||||||
|
gpu_rect->set_width(path_width / float_frame_width);
|
||||||
|
gpu_rect->set_y_center(path_offset_y / float_frame_height);
|
||||||
|
gpu_rect->set_height(path_height / float_frame_height);
|
||||||
|
}
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kNormalizedCropRect)
|
||||||
|
.Add(gpu_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||||
|
}
|
||||||
|
|
||||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
|
|
|
@ -38,6 +38,17 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kFirstCropRectTag[] = "FIRST_CROP_RECT";
|
||||||
|
constexpr char kStateCacheTag[] = "STATE_CACHE";
|
||||||
|
constexpr char kCropRectTag[] = "CROP_RECT";
|
||||||
|
constexpr char kBordersTag[] = "BORDERS";
|
||||||
|
constexpr char kSalientRegionsTag[] = "SALIENT_REGIONS";
|
||||||
|
constexpr char kVideoTag[] = "VIDEO";
|
||||||
|
constexpr char kMaxZoomFactorPctTag[] = "MAX_ZOOM_FACTOR_PCT";
|
||||||
|
constexpr char kAnimateZoomTag[] = "ANIMATE_ZOOM";
|
||||||
|
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
|
||||||
|
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||||
|
|
||||||
const char kConfigA[] = R"(
|
const char kConfigA[] = R"(
|
||||||
calculator: "ContentZoomingCalculator"
|
calculator: "ContentZoomingCalculator"
|
||||||
input_stream: "VIDEO:camera_frames"
|
input_stream: "VIDEO:camera_frames"
|
||||||
|
@ -48,12 +59,15 @@ const char kConfigA[] = R"(
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,12 +87,15 @@ const char kConfigB[] = R"(
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,12 +111,15 @@ const char kConfigC[] = R"(
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -111,17 +131,21 @@ const char kConfigD[] = R"(
|
||||||
input_stream: "DETECTIONS:detections"
|
input_stream: "DETECTIONS:detections"
|
||||||
output_stream: "CROP_RECT:rect"
|
output_stream: "CROP_RECT:rect"
|
||||||
output_stream: "FIRST_CROP_RECT:first_rect"
|
output_stream: "FIRST_CROP_RECT:first_rect"
|
||||||
|
output_stream: "NORMALIZED_CROP_RECT:float_rect"
|
||||||
options: {
|
options: {
|
||||||
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
|
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,12 +163,15 @@ const char kConfigE[] = R"(
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -162,12 +189,15 @@ const char kConfigF[] = R"(
|
||||||
max_zoom_value_deg: 0
|
max_zoom_value_deg: 0
|
||||||
kinematic_options_zoom {
|
kinematic_options_zoom {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_tilt {
|
kinematic_options_tilt {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
kinematic_options_pan {
|
kinematic_options_pan {
|
||||||
min_motion_to_reframe: 1.2
|
min_motion_to_reframe: 1.2
|
||||||
|
max_velocity: 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -220,17 +250,17 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
||||||
detections->push_back(detection);
|
detections->push_back(detection);
|
||||||
}
|
}
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("DETECTIONS")
|
->Tag(kDetectionsTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(time)));
|
.packets.push_back(Adopt(detections.release()).At(Timestamp(time)));
|
||||||
|
|
||||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(width, height);
|
auto input_size = ::absl::make_unique<std::pair<int, int>>(width, height);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("VIDEO_SIZE")
|
->Tag(kVideoSizeTag)
|
||||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(time)));
|
.packets.push_back(Adopt(input_size.release()).At(Timestamp(time)));
|
||||||
|
|
||||||
if (flags.animated_zoom.has_value()) {
|
if (flags.animated_zoom.has_value()) {
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("ANIMATE_ZOOM")
|
->Tag(kAnimateZoomTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
mediapipe::MakePacket<bool>(flags.animated_zoom.value())
|
mediapipe::MakePacket<bool>(flags.animated_zoom.value())
|
||||||
.At(Timestamp(time)));
|
.At(Timestamp(time)));
|
||||||
|
@ -238,7 +268,7 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
||||||
|
|
||||||
if (flags.max_zoom_factor_percent.has_value()) {
|
if (flags.max_zoom_factor_percent.has_value()) {
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("MAX_ZOOM_FACTOR_PCT")
|
->Tag(kMaxZoomFactorPctTag)
|
||||||
.packets.push_back(
|
.packets.push_back(
|
||||||
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
|
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
|
||||||
.At(Timestamp(time)));
|
.At(Timestamp(time)));
|
||||||
|
@ -250,6 +280,21 @@ void AddDetection(const cv::Rect_<float>& position, const int64 time,
|
||||||
AddDetectionFrameSize(position, time, 1000, 1000, runner);
|
AddDetectionFrameSize(position, time, 1000, 1000, runner);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckCropRectFloats(const float x_center, const float y_center,
|
||||||
|
const float width, const float height,
|
||||||
|
const int frame_number,
|
||||||
|
const CalculatorRunner::StreamContentsSet& output) {
|
||||||
|
ASSERT_GT(output.Tag("NORMALIZED_CROP_RECT").packets.size(), frame_number);
|
||||||
|
auto float_rect = output.Tag("NORMALIZED_CROP_RECT")
|
||||||
|
.packets[frame_number]
|
||||||
|
.Get<mediapipe::NormalizedRect>();
|
||||||
|
|
||||||
|
EXPECT_FLOAT_EQ(float_rect.x_center(), x_center);
|
||||||
|
EXPECT_FLOAT_EQ(float_rect.y_center(), y_center);
|
||||||
|
EXPECT_FLOAT_EQ(float_rect.width(), width);
|
||||||
|
EXPECT_FLOAT_EQ(float_rect.height(), height);
|
||||||
|
}
|
||||||
|
|
||||||
void CheckCropRect(const int x_center, const int y_center, const int width,
|
void CheckCropRect(const int x_center, const int y_center, const int width,
|
||||||
const int height, const int frame_number,
|
const int height, const int frame_number,
|
||||||
const std::vector<Packet>& output_packets) {
|
const std::vector<Packet>& output_packets) {
|
||||||
|
@ -274,21 +319,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
|
||||||
|
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("SALIENT_REGIONS")
|
->Tag(kSalientRegionsTag)
|
||||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("BORDERS").packets;
|
runner->Outputs().Tag(kBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
CheckBorder(static_features, 1000, 1000, 495, 395);
|
CheckBorder(static_features, 1000, 1000, 494, 394);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
||||||
|
@ -297,7 +342,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
||||||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, PanConfig) {
|
TEST(ContentZoomingCalculatorTest, PanConfig) {
|
||||||
|
@ -313,9 +358,9 @@ TEST(ContentZoomingCalculatorTest, PanConfig) {
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(483, 550, 111, 111, 1,
|
CheckCropRect(483, 550, 111, 111, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
|
TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
|
||||||
|
@ -330,31 +375,31 @@ TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
|
||||||
options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(50.0);
|
options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(50.0);
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(483, 550, 111, 111, 0,
|
CheckCropRect(483, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
// Now repeat the last frame for a new runner without the cache to see a reset
|
// Now repeat the last frame for a new runner without the cache to see a reset
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,9 +416,9 @@ TEST(ContentZoomingCalculatorTest, TiltConfig) {
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(450, 583, 111, 111, 1,
|
CheckCropRect(450, 583, 111, 111, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ZoomConfig) {
|
TEST(ContentZoomingCalculatorTest, ZoomConfig) {
|
||||||
|
@ -389,9 +434,9 @@ TEST(ContentZoomingCalculatorTest, ZoomConfig) {
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(450, 550, 139, 139, 1,
|
CheckCropRect(450, 550, 138, 138, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
|
TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
|
||||||
|
@ -406,31 +451,31 @@ TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
|
||||||
options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2);
|
options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2);
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 111, 111, 0,
|
CheckCropRect(450, 550, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(450, 550, 139, 139, 0,
|
CheckCropRect(450, 550, 138, 138, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
// Now repeat the last frame for a new runner without the cache to see a reset
|
// Now repeat the last frame for a new runner without the cache to see a reset
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
||||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -448,18 +493,18 @@ TEST(ContentZoomingCalculatorTest, MinAspectBorderValues) {
|
||||||
|
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("SALIENT_REGIONS")
|
->Tag(kSalientRegionsTag)
|
||||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("BORDERS").packets;
|
runner->Outputs().Tag(kBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
CheckBorder(static_features, 1000, 1000, 250, 250);
|
CheckBorder(static_features, 1000, 1000, 250, 250);
|
||||||
|
@ -485,18 +530,18 @@ TEST(ContentZoomingCalculatorTest, TwoFacesWide) {
|
||||||
|
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("SALIENT_REGIONS")
|
->Tag(kSalientRegionsTag)
|
||||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("BORDERS").packets;
|
runner->Outputs().Tag(kBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
|
|
||||||
|
@ -510,18 +555,18 @@ TEST(ContentZoomingCalculatorTest, NoDetectionOnInit) {
|
||||||
|
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("SALIENT_REGIONS")
|
->Tag(kSalientRegionsTag)
|
||||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("BORDERS").packets;
|
runner->Outputs().Tag(kBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
|
|
||||||
|
@ -542,21 +587,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTestPairSize) {
|
||||||
|
|
||||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("VIDEO_SIZE")
|
->Tag(kVideoSizeTag)
|
||||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("SALIENT_REGIONS")
|
->Tag(kSalientRegionsTag)
|
||||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
// Run the calculator.
|
// Run the calculator.
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("BORDERS").packets;
|
runner->Outputs().Tag(kBordersTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||||
CheckBorder(static_features, 1000, 1000, 495, 395);
|
CheckBorder(static_features, 1000, 1000, 494, 394);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
|
TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
|
||||||
|
@ -571,9 +616,9 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
|
||||||
AddDetection(cv::Rect_<float>(.9, .9, .1, .1), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(.9, .9, .1, .1), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(972, 972, 55, 55, 0,
|
CheckCropRect(972, 972, 55, 55, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(958, 958, 83, 83, 1,
|
CheckCropRect(944, 944, 83, 83, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
|
TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
|
||||||
|
@ -587,8 +632,8 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
|
||||||
AddDetection(cv::Rect_<float>(0, 0, .05, .05), 0, runner.get());
|
AddDetection(cv::Rect_<float>(0, 0, .05, .05), 0, runner.get());
|
||||||
AddDetection(cv::Rect_<float>(0, 0, .1, .1), 1000000, runner.get());
|
AddDetection(cv::Rect_<float>(0, 0, .1, .1), 1000000, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag("CROP_RECT").packets);
|
CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(42, 42, 83, 83, 1, runner->Outputs().Tag("CROP_RECT").packets);
|
CheckCropRect(56, 56, 83, 83, 1, runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, VerticalShift) {
|
TEST(ContentZoomingCalculatorTest, VerticalShift) {
|
||||||
|
@ -601,7 +646,9 @@ TEST(ContentZoomingCalculatorTest, VerticalShift) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
||||||
CheckCropRect(150, 170, 111, 111, 0,
|
CheckCropRect(150, 170, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
|
CheckCropRectFloats(150 / 1000.0, 170 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
|
||||||
|
runner->Outputs());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, HorizontalShift) {
|
TEST(ContentZoomingCalculatorTest, HorizontalShift) {
|
||||||
|
@ -614,7 +661,9 @@ TEST(ContentZoomingCalculatorTest, HorizontalShift) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
||||||
CheckCropRect(170, 150, 111, 111, 0,
|
CheckCropRect(170, 150, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
|
CheckCropRectFloats(170 / 1000.0, 150 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
|
||||||
|
runner->Outputs());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
|
TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
|
||||||
|
@ -627,14 +676,14 @@ TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
|
||||||
AddDetection(cv::Rect_<float>(.9, 0, .1, .1), 0, runner.get());
|
AddDetection(cv::Rect_<float>(.9, 0, .1, .1), 0, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(944, 56, 111, 111, 0,
|
CheckCropRect(944, 56, 111, 111, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, EmptySize) {
|
TEST(ContentZoomingCalculatorTest, EmptySize) {
|
||||||
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
|
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
ASSERT_EQ(runner->Outputs().Tag("CROP_RECT").packets.size(), 0);
|
ASSERT_EQ(runner->Outputs().Tag(kCropRectTag).packets.size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, EmptyDetections) {
|
TEST(ContentZoomingCalculatorTest, EmptyDetections) {
|
||||||
|
@ -642,11 +691,11 @@ TEST(ContentZoomingCalculatorTest, EmptyDetections) {
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("VIDEO_SIZE")
|
->Tag(kVideoSizeTag)
|
||||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
|
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
|
||||||
|
@ -658,9 +707,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 222, 222, 0,
|
CheckCropRect(500, 500, 222, 222, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 1,
|
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
|
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
|
||||||
|
@ -669,23 +718,23 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
|
||||||
config.add_input_side_packet("STATE_CACHE:state_cache");
|
config.add_input_side_packet("STATE_CACHE:state_cache");
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 1000, 1000,
|
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 1000, 1000,
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 222, 222, 0,
|
CheckCropRect(500, 500, 222, 222, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1, 500, 500,
|
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1, 500, 500,
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 0,
|
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -700,11 +749,11 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZooming) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 888, 888, 0,
|
CheckCropRect(500, 500, 888, 888, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 588, 588, 1,
|
CheckCropRect(500, 500, 588, 588, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 2,
|
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
||||||
|
@ -713,18 +762,18 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
||||||
config.add_input_side_packet("STATE_CACHE:state_cache");
|
config.add_input_side_packet("STATE_CACHE:state_cache");
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetectionFrameSize(cv::Rect_<float>(.1, .1, .8, .8), 0, 1000, 1000,
|
AddDetectionFrameSize(cv::Rect_<float>(.1, .1, .8, .8), 0, 1000, 1000,
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 888, 888, 0,
|
CheckCropRect(500, 500, 888, 888, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
// The second runner should just resume based on state from the first runner.
|
// The second runner should just resume based on state from the first runner.
|
||||||
{
|
{
|
||||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
|
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
|
||||||
runner.get());
|
runner.get());
|
||||||
|
@ -732,9 +781,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 588, 588, 0,
|
CheckCropRect(500, 500, 588, 588, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 1,
|
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -749,7 +798,7 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
// 55/60 * 1000 = 916
|
// 55/60 * 1000 = 916
|
||||||
CheckCropRect(500, 500, 916, 916, 0,
|
CheckCropRect(500, 500, 916, 916, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
||||||
|
@ -772,11 +821,11 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
||||||
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
|
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
|
||||||
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
|
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
|
||||||
CheckCropRect(320, 240, 480, 360, 0,
|
CheckCropRect(320, 240, 480, 360, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(640, 360, 769, 433, 2,
|
CheckCropRect(640, 360, 769, 433, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(320, 240, 480, 360, 3,
|
CheckCropRect(320, 240, 480, 360, 3,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
||||||
|
@ -795,9 +844,9 @@ TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
// 55/60 * 1000 = 916
|
// 55/60 * 1000 = 916
|
||||||
CheckCropRect(500, 500, 950, 950, 0,
|
CheckCropRect(500, 500, 950, 950, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 1000, 1000, 2,
|
CheckCropRect(500, 500, 1000, 1000, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
|
TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
|
||||||
|
@ -816,13 +865,13 @@ TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 880, 880, 1,
|
CheckCropRect(500, 500, 880, 880, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 760, 760, 2,
|
CheckCropRect(500, 500, 760, 760, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 655, 655, 3,
|
CheckCropRect(500, 500, 655, 655, 3,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
|
TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
|
||||||
|
@ -844,15 +893,15 @@ TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 1000, 1000, 1,
|
CheckCropRect(500, 500, 1000, 1000, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 470, 470, 2,
|
CheckCropRect(500, 500, 470, 470, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 222, 222, 3,
|
CheckCropRect(500, 500, 222, 222, 3,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 222, 222, 4,
|
CheckCropRect(500, 500, 222, 222, 4,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
|
TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
|
||||||
|
@ -879,15 +928,15 @@ TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
|
||||||
runner.get(), {.animated_zoom = false});
|
runner.get(), {.animated_zoom = false});
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 1000, 1000, 1,
|
CheckCropRect(500, 500, 1000, 1000, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 470, 470, 2,
|
CheckCropRect(500, 500, 470, 470, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 222, 222, 3,
|
CheckCropRect(500, 500, 222, 222, 3,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 222, 222, 4,
|
CheckCropRect(500, 500, 222, 222, 4,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
|
TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
|
||||||
|
@ -907,11 +956,11 @@ TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
|
||||||
runner.get(), {.animated_zoom = false});
|
runner.get(), {.animated_zoom = false});
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 880, 880, 1,
|
CheckCropRect(500, 500, 880, 880, 1,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
CheckCropRect(500, 500, 760, 760, 2,
|
CheckCropRect(500, 500, 760, 760, 2,
|
||||||
runner->Outputs().Tag("CROP_RECT").packets);
|
runner->Outputs().Tag(kCropRectTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
|
TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
|
||||||
|
@ -920,13 +969,13 @@ TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
|
||||||
|
|
||||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("VIDEO_SIZE")
|
->Tag(kVideoSizeTag)
|
||||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("FIRST_CROP_RECT").packets;
|
runner->Outputs().Tag(kFirstCropRectTag).packets;
|
||||||
ASSERT_EQ(output_packets.size(), 1);
|
ASSERT_EQ(output_packets.size(), 1);
|
||||||
const auto& rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
const auto& rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
||||||
EXPECT_EQ(rect.x_center(), 0);
|
EXPECT_EQ(rect.x_center(), 0);
|
||||||
|
@ -951,7 +1000,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
|
||||||
runner.get());
|
runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("FIRST_CROP_RECT").packets;
|
runner->Outputs().Tag(kFirstCropRectTag).packets;
|
||||||
ASSERT_EQ(output_packets.size(), 4);
|
ASSERT_EQ(output_packets.size(), 4);
|
||||||
const auto& first_rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
const auto& first_rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
||||||
EXPECT_NEAR(first_rect.x_center(), 0.5, 0.05);
|
EXPECT_NEAR(first_rect.x_center(), 0.5, 0.05);
|
||||||
|
|
|
@ -64,7 +64,7 @@ message FaceBoxAdjusterCalculatorOptions {
|
||||||
|
|
||||||
// Max value of head motion (max of current or history) to be considered still
|
// Max value of head motion (max of current or history) to be considered still
|
||||||
// stable.
|
// stable.
|
||||||
optional float head_motion_threshold = 14 [default = 10.0];
|
optional float head_motion_threshold = 14 [default = 360.0];
|
||||||
|
|
||||||
// The max amount of time to use an old eye distance when the face look angle
|
// The max amount of time to use an old eye distance when the face look angle
|
||||||
// is unstable.
|
// is unstable.
|
||||||
|
|
|
@ -32,6 +32,10 @@
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
|
|
||||||
|
constexpr char kRegionsTag[] = "REGIONS";
|
||||||
|
constexpr char kFacesTag[] = "FACES";
|
||||||
|
constexpr char kVideoTag[] = "VIDEO";
|
||||||
|
|
||||||
// This calculator converts detected faces to SalientRegion protos that can be
|
// This calculator converts detected faces to SalientRegion protos that can be
|
||||||
// used for downstream processing. Each SalientRegion is scored using image
|
// used for downstream processing. Each SalientRegion is scored using image
|
||||||
// cues. Scoring can be controlled through
|
// cues. Scoring can be controlled through
|
||||||
|
@ -80,17 +84,17 @@ FaceToRegionCalculator::FaceToRegionCalculator() {}
|
||||||
|
|
||||||
absl::Status FaceToRegionCalculator::GetContract(
|
absl::Status FaceToRegionCalculator::GetContract(
|
||||||
mediapipe::CalculatorContract* cc) {
|
mediapipe::CalculatorContract* cc) {
|
||||||
if (cc->Inputs().HasTag("VIDEO")) {
|
if (cc->Inputs().HasTag(kVideoTag)) {
|
||||||
cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
|
cc->Inputs().Tag(kVideoTag).Set<ImageFrame>();
|
||||||
}
|
}
|
||||||
cc->Inputs().Tag("FACES").Set<std::vector<mediapipe::Detection>>();
|
cc->Inputs().Tag(kFacesTag).Set<std::vector<mediapipe::Detection>>();
|
||||||
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
|
cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status FaceToRegionCalculator::Open(mediapipe::CalculatorContext* cc) {
|
absl::Status FaceToRegionCalculator::Open(mediapipe::CalculatorContext* cc) {
|
||||||
options_ = cc->Options<FaceToRegionCalculatorOptions>();
|
options_ = cc->Options<FaceToRegionCalculatorOptions>();
|
||||||
if (!cc->Inputs().HasTag("VIDEO")) {
|
if (!cc->Inputs().HasTag(kVideoTag)) {
|
||||||
RET_CHECK(!options_.use_visual_scorer())
|
RET_CHECK(!options_.use_visual_scorer())
|
||||||
<< "VIDEO input must be provided when using visual_scorer.";
|
<< "VIDEO input must be provided when using visual_scorer.";
|
||||||
RET_CHECK(!options_.export_individual_face_landmarks())
|
RET_CHECK(!options_.export_individual_face_landmarks())
|
||||||
|
@ -146,24 +150,24 @@ void FaceToRegionCalculator::ExtendSalientRegionWithPoint(
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
|
absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
|
||||||
if (cc->Inputs().HasTag("VIDEO") &&
|
if (cc->Inputs().HasTag(kVideoTag) &&
|
||||||
cc->Inputs().Tag("VIDEO").Value().IsEmpty()) {
|
cc->Inputs().Tag(kVideoTag).Value().IsEmpty()) {
|
||||||
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
|
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
|
||||||
<< "No VIDEO input at time " << cc->InputTimestamp().Seconds();
|
<< "No VIDEO input at time " << cc->InputTimestamp().Seconds();
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Mat frame;
|
cv::Mat frame;
|
||||||
if (cc->Inputs().HasTag("VIDEO")) {
|
if (cc->Inputs().HasTag(kVideoTag)) {
|
||||||
frame = mediapipe::formats::MatView(
|
frame = mediapipe::formats::MatView(
|
||||||
&cc->Inputs().Tag("VIDEO").Get<ImageFrame>());
|
&cc->Inputs().Tag(kVideoTag).Get<ImageFrame>());
|
||||||
frame_width_ = frame.cols;
|
frame_width_ = frame.cols;
|
||||||
frame_height_ = frame.rows;
|
frame_height_ = frame.rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto region_set = ::absl::make_unique<DetectionSet>();
|
auto region_set = ::absl::make_unique<DetectionSet>();
|
||||||
if (!cc->Inputs().Tag("FACES").Value().IsEmpty()) {
|
if (!cc->Inputs().Tag(kFacesTag).Value().IsEmpty()) {
|
||||||
const auto& input_faces =
|
const auto& input_faces =
|
||||||
cc->Inputs().Tag("FACES").Get<std::vector<mediapipe::Detection>>();
|
cc->Inputs().Tag(kFacesTag).Get<std::vector<mediapipe::Detection>>();
|
||||||
|
|
||||||
for (const auto& input_face : input_faces) {
|
for (const auto& input_face : input_faces) {
|
||||||
RET_CHECK(input_face.location_data().format() ==
|
RET_CHECK(input_face.location_data().format() ==
|
||||||
|
@ -276,7 +280,9 @@ absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cc->Outputs().Tag("REGIONS").Add(region_set.release(), cc->InputTimestamp());
|
cc->Outputs()
|
||||||
|
.Tag(kRegionsTag)
|
||||||
|
.Add(region_set.release(), cc->InputTimestamp());
|
||||||
|
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,10 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kRegionsTag[] = "REGIONS";
|
||||||
|
constexpr char kFacesTag[] = "FACES";
|
||||||
|
constexpr char kVideoTag[] = "VIDEO";
|
||||||
|
|
||||||
const char kConfig[] = R"(
|
const char kConfig[] = R"(
|
||||||
calculator: "FaceToRegionCalculator"
|
calculator: "FaceToRegionCalculator"
|
||||||
input_stream: "VIDEO:frames"
|
input_stream: "VIDEO:frames"
|
||||||
|
@ -100,7 +104,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
|
||||||
if (include_video) {
|
if (include_video) {
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600);
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||||
}
|
}
|
||||||
// Setup two faces as input.
|
// Setup two faces as input.
|
||||||
|
@ -109,7 +113,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
|
||||||
for (const auto& face : faces) {
|
for (const auto& face : faces) {
|
||||||
input_faces->push_back(ParseTextProtoOrDie<Detection>(face));
|
input_faces->push_back(ParseTextProtoOrDie<Detection>(face));
|
||||||
}
|
}
|
||||||
runner->MutableInputs()->Tag("FACES").packets.push_back(
|
runner->MutableInputs()->Tag(kFacesTag).packets.push_back(
|
||||||
Adopt(input_faces.release()).At(Timestamp::PostStream()));
|
Adopt(input_faces.release()).At(Timestamp::PostStream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,7 +148,7 @@ TEST(FaceToRegionCalculatorTest, FaceFullTypeSize) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
|
@ -177,7 +181,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksTypeSize) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
|
@ -208,7 +212,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksBox) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
|
@ -243,7 +247,7 @@ TEST(FaceToRegionCalculatorTest, FaceScore) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
ASSERT_EQ(1, regions.detections().size());
|
ASSERT_EQ(1, regions.detections().size());
|
||||||
|
@ -292,7 +296,7 @@ TEST(FaceToRegionCalculatorTest, FaceNoVideoPass) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
|
|
|
@ -52,6 +52,9 @@ LocalizationToRegionCalculator::LocalizationToRegionCalculator() {}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kRegionsTag[] = "REGIONS";
|
||||||
|
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||||
|
|
||||||
// Converts an object detection to a autoflip SignalType. Returns true if the
|
// Converts an object detection to a autoflip SignalType. Returns true if the
|
||||||
// std::string label has a autoflip label.
|
// std::string label has a autoflip label.
|
||||||
bool MatchType(const std::string& label, SignalType* type) {
|
bool MatchType(const std::string& label, SignalType* type) {
|
||||||
|
@ -86,8 +89,8 @@ void FillSalientRegion(const mediapipe::Detection& detection,
|
||||||
|
|
||||||
absl::Status LocalizationToRegionCalculator::GetContract(
|
absl::Status LocalizationToRegionCalculator::GetContract(
|
||||||
mediapipe::CalculatorContract* cc) {
|
mediapipe::CalculatorContract* cc) {
|
||||||
cc->Inputs().Tag("DETECTIONS").Set<std::vector<mediapipe::Detection>>();
|
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<mediapipe::Detection>>();
|
||||||
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
|
cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +104,7 @@ absl::Status LocalizationToRegionCalculator::Open(
|
||||||
absl::Status LocalizationToRegionCalculator::Process(
|
absl::Status LocalizationToRegionCalculator::Process(
|
||||||
mediapipe::CalculatorContext* cc) {
|
mediapipe::CalculatorContext* cc) {
|
||||||
const auto& annotations =
|
const auto& annotations =
|
||||||
cc->Inputs().Tag("DETECTIONS").Get<std::vector<mediapipe::Detection>>();
|
cc->Inputs().Tag(kDetectionsTag).Get<std::vector<mediapipe::Detection>>();
|
||||||
auto regions = ::absl::make_unique<DetectionSet>();
|
auto regions = ::absl::make_unique<DetectionSet>();
|
||||||
for (const auto& detection : annotations) {
|
for (const auto& detection : annotations) {
|
||||||
RET_CHECK_EQ(detection.label().size(), 1)
|
RET_CHECK_EQ(detection.label().size(), 1)
|
||||||
|
@ -118,7 +121,7 @@ absl::Status LocalizationToRegionCalculator::Process(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cc->Outputs().Tag("REGIONS").Add(regions.release(), cc->InputTimestamp());
|
cc->Outputs().Tag(kRegionsTag).Add(regions.release(), cc->InputTimestamp());
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,9 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kRegionsTag[] = "REGIONS";
|
||||||
|
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||||
|
|
||||||
const char kConfig[] = R"(
|
const char kConfig[] = R"(
|
||||||
calculator: "LocalizationToRegionCalculator"
|
calculator: "LocalizationToRegionCalculator"
|
||||||
input_stream: "DETECTIONS:detections"
|
input_stream: "DETECTIONS:detections"
|
||||||
|
@ -81,7 +84,7 @@ void SetInputs(CalculatorRunner* runner,
|
||||||
inputs->push_back(ParseTextProtoOrDie<Detection>(detection));
|
inputs->push_back(ParseTextProtoOrDie<Detection>(detection));
|
||||||
}
|
}
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("DETECTIONS")
|
->Tag(kDetectionsTag)
|
||||||
.packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream()));
|
.packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +112,7 @@ TEST(LocalizationToRegionCalculatorTest, StandardTypes) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
ASSERT_EQ(2, regions.detections().size());
|
ASSERT_EQ(2, regions.detections().size());
|
||||||
|
@ -137,7 +140,7 @@ TEST(LocalizationToRegionCalculatorTest, AllTypes) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
ASSERT_EQ(3, regions.detections().size());
|
ASSERT_EQ(3, regions.detections().size());
|
||||||
|
@ -153,7 +156,7 @@ TEST(LocalizationToRegionCalculatorTest, BothTypes) {
|
||||||
|
|
||||||
// Check the output regions.
|
// Check the output regions.
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("REGIONS").packets;
|
runner->Outputs().Tag(kRegionsTag).packets;
|
||||||
ASSERT_EQ(1, output_packets.size());
|
ASSERT_EQ(1, output_packets.size());
|
||||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||||
ASSERT_EQ(5, regions.detections().size());
|
ASSERT_EQ(5, regions.detections().size());
|
||||||
|
|
|
@ -34,6 +34,23 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kFramingDetectionsVizFramesTag[] =
|
||||||
|
"FRAMING_DETECTIONS_VIZ_FRAMES";
|
||||||
|
constexpr char kExternalRenderingFullVidTag[] = "EXTERNAL_RENDERING_FULL_VID";
|
||||||
|
constexpr char kExternalRenderingPerFrameTag[] = "EXTERNAL_RENDERING_PER_FRAME";
|
||||||
|
constexpr char kCroppingSummaryTag[] = "CROPPING_SUMMARY";
|
||||||
|
constexpr char kSalientPointFrameVizFramesTag[] =
|
||||||
|
"SALIENT_POINT_FRAME_VIZ_FRAMES";
|
||||||
|
constexpr char kKeyFrameCropRegionVizFramesTag[] =
|
||||||
|
"KEY_FRAME_CROP_REGION_VIZ_FRAMES";
|
||||||
|
constexpr char kCroppedFramesTag[] = "CROPPED_FRAMES";
|
||||||
|
constexpr char kShotBoundariesTag[] = "SHOT_BOUNDARIES";
|
||||||
|
constexpr char kStaticFeaturesTag[] = "STATIC_FEATURES";
|
||||||
|
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
|
||||||
|
constexpr char kVideoFramesTag[] = "VIDEO_FRAMES";
|
||||||
|
constexpr char kDetectionFeaturesTag[] = "DETECTION_FEATURES";
|
||||||
|
constexpr char kKeyFramesTag[] = "KEY_FRAMES";
|
||||||
|
|
||||||
using ::testing::HasSubstr;
|
using ::testing::HasSubstr;
|
||||||
|
|
||||||
constexpr char kConfig[] = R"(
|
constexpr char kConfig[] = R"(
|
||||||
|
@ -241,10 +258,10 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
|
||||||
const int key_frame_height, bool randomize,
|
const int key_frame_height, bool randomize,
|
||||||
CalculatorRunner::StreamContentsSet* inputs) {
|
CalculatorRunner::StreamContentsSet* inputs) {
|
||||||
Timestamp timestamp(time_ms);
|
Timestamp timestamp(time_ms);
|
||||||
if (inputs->HasTag("KEY_FRAMES")) {
|
if (inputs->HasTag(kKeyFramesTag)) {
|
||||||
auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width,
|
auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width,
|
||||||
key_frame_height);
|
key_frame_height);
|
||||||
inputs->Tag("KEY_FRAMES")
|
inputs->Tag(kKeyFramesTag)
|
||||||
.packets.push_back(Adopt(key_frame.release()).At(timestamp));
|
.packets.push_back(Adopt(key_frame.release()).At(timestamp));
|
||||||
}
|
}
|
||||||
if (randomize) {
|
if (randomize) {
|
||||||
|
@ -252,11 +269,11 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
|
||||||
kMinNumDetections, kMaxNumDetections)(GetGen());
|
kMinNumDetections, kMaxNumDetections)(GetGen());
|
||||||
auto detections =
|
auto detections =
|
||||||
MakeDetections(num_detections, key_frame_width, key_frame_height);
|
MakeDetections(num_detections, key_frame_width, key_frame_height);
|
||||||
inputs->Tag("DETECTION_FEATURES")
|
inputs->Tag(kDetectionFeaturesTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||||
} else {
|
} else {
|
||||||
auto detections = MakeCenterDetection(key_frame_width, key_frame_height);
|
auto detections = MakeCenterDetection(key_frame_width, key_frame_height);
|
||||||
inputs->Tag("DETECTION_FEATURES")
|
inputs->Tag(kDetectionFeaturesTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -272,19 +289,19 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
||||||
int64 time_ms = start_frame_index * kTimestampDiff;
|
int64 time_ms = start_frame_index * kTimestampDiff;
|
||||||
for (int i = 0; i < num_scene_frames; ++i) {
|
for (int i = 0; i < num_scene_frames; ++i) {
|
||||||
Timestamp timestamp(time_ms);
|
Timestamp timestamp(time_ms);
|
||||||
if (inputs->HasTag("VIDEO_FRAMES")) {
|
if (inputs->HasTag(kVideoFramesTag)) {
|
||||||
auto frame =
|
auto frame =
|
||||||
MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height);
|
MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height);
|
||||||
inputs->Tag("VIDEO_FRAMES")
|
inputs->Tag(kVideoFramesTag)
|
||||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||||
} else {
|
} else {
|
||||||
auto input_size =
|
auto input_size =
|
||||||
::absl::make_unique<std::pair<int, int>>(frame_width, frame_height);
|
::absl::make_unique<std::pair<int, int>>(frame_width, frame_height);
|
||||||
inputs->Tag("VIDEO_SIZE")
|
inputs->Tag(kVideoSizeTag)
|
||||||
.packets.push_back(Adopt(input_size.release()).At(timestamp));
|
.packets.push_back(Adopt(input_size.release()).At(timestamp));
|
||||||
}
|
}
|
||||||
auto static_features = absl::make_unique<StaticFeatures>();
|
auto static_features = absl::make_unique<StaticFeatures>();
|
||||||
inputs->Tag("STATIC_FEATURES")
|
inputs->Tag(kStaticFeaturesTag)
|
||||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||||
if (DownSampleRate == 1) {
|
if (DownSampleRate == 1) {
|
||||||
AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, false,
|
AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, false,
|
||||||
|
@ -294,7 +311,7 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
||||||
inputs);
|
inputs);
|
||||||
}
|
}
|
||||||
if (i == num_scene_frames - 1) { // adds shot boundary
|
if (i == num_scene_frames - 1) { // adds shot boundary
|
||||||
inputs->Tag("SHOT_BOUNDARIES")
|
inputs->Tag(kShotBoundariesTag)
|
||||||
.packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms)));
|
.packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms)));
|
||||||
}
|
}
|
||||||
time_ms += kTimestampDiff;
|
time_ms += kTimestampDiff;
|
||||||
|
@ -306,8 +323,8 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
||||||
void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames,
|
void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames,
|
||||||
const int target_width, const int target_height) {
|
const int target_width, const int target_height) {
|
||||||
const auto& outputs = runner.Outputs();
|
const auto& outputs = runner.Outputs();
|
||||||
EXPECT_TRUE(outputs.HasTag("CROPPED_FRAMES"));
|
EXPECT_TRUE(outputs.HasTag(kCroppedFramesTag));
|
||||||
const auto& cropped_frames_outputs = outputs.Tag("CROPPED_FRAMES").packets;
|
const auto& cropped_frames_outputs = outputs.Tag(kCroppedFramesTag).packets;
|
||||||
EXPECT_EQ(cropped_frames_outputs.size(), num_frames);
|
EXPECT_EQ(cropped_frames_outputs.size(), num_frames);
|
||||||
for (int i = 0; i < num_frames; ++i) {
|
for (int i = 0; i < num_frames; ++i) {
|
||||||
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
||||||
|
@ -392,23 +409,23 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
|
||||||
|
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& outputs = runner->Outputs();
|
const auto& outputs = runner->Outputs();
|
||||||
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES"));
|
EXPECT_TRUE(outputs.HasTag(kKeyFrameCropRegionVizFramesTag));
|
||||||
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES"));
|
EXPECT_TRUE(outputs.HasTag(kSalientPointFrameVizFramesTag));
|
||||||
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY"));
|
EXPECT_TRUE(outputs.HasTag(kCroppingSummaryTag));
|
||||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME"));
|
EXPECT_TRUE(outputs.HasTag(kExternalRenderingPerFrameTag));
|
||||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID"));
|
EXPECT_TRUE(outputs.HasTag(kExternalRenderingFullVidTag));
|
||||||
EXPECT_TRUE(outputs.HasTag("FRAMING_DETECTIONS_VIZ_FRAMES"));
|
EXPECT_TRUE(outputs.HasTag(kFramingDetectionsVizFramesTag));
|
||||||
const auto& crop_region_viz_frames_outputs =
|
const auto& crop_region_viz_frames_outputs =
|
||||||
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets;
|
outputs.Tag(kKeyFrameCropRegionVizFramesTag).packets;
|
||||||
const auto& salient_point_viz_frames_outputs =
|
const auto& salient_point_viz_frames_outputs =
|
||||||
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets;
|
outputs.Tag(kSalientPointFrameVizFramesTag).packets;
|
||||||
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets;
|
const auto& summary_output = outputs.Tag(kCroppingSummaryTag).packets;
|
||||||
const auto& ext_render_per_frame =
|
const auto& ext_render_per_frame =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||||
const auto& ext_render_full_vid =
|
const auto& ext_render_full_vid =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets;
|
outputs.Tag(kExternalRenderingFullVidTag).packets;
|
||||||
const auto& framing_viz_frames_output =
|
const auto& framing_viz_frames_output =
|
||||||
outputs.Tag("FRAMING_DETECTIONS_VIZ_FRAMES").packets;
|
outputs.Tag(kFramingDetectionsVizFramesTag).packets;
|
||||||
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
|
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
|
||||||
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
|
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
|
||||||
EXPECT_EQ(framing_viz_frames_output.size(), num_frames);
|
EXPECT_EQ(framing_viz_frames_output.size(), num_frames);
|
||||||
|
@ -597,7 +614,7 @@ TEST(SceneCroppingCalculatorTest, ProducesEvenFrameSize) {
|
||||||
kKeyFrameHeight, kDownSampleRate, runner->MutableInputs());
|
kKeyFrameHeight, kDownSampleRate, runner->MutableInputs());
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& output_frame = runner->Outputs()
|
const auto& output_frame = runner->Outputs()
|
||||||
.Tag("CROPPED_FRAMES")
|
.Tag(kCroppedFramesTag)
|
||||||
.packets[0]
|
.packets[0]
|
||||||
.Get<ImageFrame>();
|
.Get<ImageFrame>();
|
||||||
EXPECT_EQ(output_frame.Width() % 2, 0);
|
EXPECT_EQ(output_frame.Width() % 2, 0);
|
||||||
|
@ -646,7 +663,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
||||||
Timestamp timestamp(time_ms);
|
Timestamp timestamp(time_ms);
|
||||||
auto frame =
|
auto frame =
|
||||||
MakeImageFrameFromColor(GetRandomColor(), input_width, input_height);
|
MakeImageFrameFromColor(GetRandomColor(), input_width, input_height);
|
||||||
inputs->Tag("VIDEO_FRAMES")
|
inputs->Tag(kVideoFramesTag)
|
||||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||||
if (i % static_features_downsample_rate == 0) {
|
if (i % static_features_downsample_rate == 0) {
|
||||||
auto static_features = absl::make_unique<StaticFeatures>();
|
auto static_features = absl::make_unique<StaticFeatures>();
|
||||||
|
@ -657,7 +674,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
||||||
color->set_g(green);
|
color->set_g(green);
|
||||||
color->set_b(red);
|
color->set_b(red);
|
||||||
}
|
}
|
||||||
inputs->Tag("STATIC_FEATURES")
|
inputs->Tag(kStaticFeaturesTag)
|
||||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||||
num_static_features++;
|
num_static_features++;
|
||||||
}
|
}
|
||||||
|
@ -672,7 +689,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
||||||
location->set_y(0);
|
location->set_y(0);
|
||||||
location->set_width(80);
|
location->set_width(80);
|
||||||
location->set_height(input_height);
|
location->set_height(input_height);
|
||||||
inputs->Tag("DETECTION_FEATURES")
|
inputs->Tag(kDetectionFeaturesTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||||
}
|
}
|
||||||
time_ms += kTimestampDiff;
|
time_ms += kTimestampDiff;
|
||||||
|
@ -683,7 +700,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
||||||
// Checks that the top and bottom borders indeed have the background color.
|
// Checks that the top and bottom borders indeed have the background color.
|
||||||
const int border_size = 37;
|
const int border_size = 37;
|
||||||
const auto& cropped_frames_outputs =
|
const auto& cropped_frames_outputs =
|
||||||
runner->Outputs().Tag("CROPPED_FRAMES").packets;
|
runner->Outputs().Tag(kCroppedFramesTag).packets;
|
||||||
EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize);
|
EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize);
|
||||||
for (int i = 0; i < kSceneSize; ++i) {
|
for (int i = 0; i < kSceneSize; ++i) {
|
||||||
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
||||||
|
@ -727,7 +744,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
||||||
auto mat = formats::MatView(frame.get());
|
auto mat = formats::MatView(frame.get());
|
||||||
mat(top_border_rect) = border_color;
|
mat(top_border_rect) = border_color;
|
||||||
mat(bottom_border_rect) = border_color;
|
mat(bottom_border_rect) = border_color;
|
||||||
inputs->Tag("VIDEO_FRAMES")
|
inputs->Tag(kVideoFramesTag)
|
||||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||||
// Set borders in static features.
|
// Set borders in static features.
|
||||||
auto static_features = absl::make_unique<StaticFeatures>();
|
auto static_features = absl::make_unique<StaticFeatures>();
|
||||||
|
@ -737,11 +754,11 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
||||||
auto* bottom_part = static_features->add_border();
|
auto* bottom_part = static_features->add_border();
|
||||||
bottom_part->set_relative_position(Border::BOTTOM);
|
bottom_part->set_relative_position(Border::BOTTOM);
|
||||||
bottom_part->mutable_border_position()->set_height(bottom_border_size);
|
bottom_part->mutable_border_position()->set_height(bottom_border_size);
|
||||||
inputs->Tag("STATIC_FEATURES")
|
inputs->Tag(kStaticFeaturesTag)
|
||||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||||
// Add empty detections to ensure no padding is used.
|
// Add empty detections to ensure no padding is used.
|
||||||
auto detections = absl::make_unique<DetectionSet>();
|
auto detections = absl::make_unique<DetectionSet>();
|
||||||
inputs->Tag("DETECTION_FEATURES")
|
inputs->Tag(kDetectionFeaturesTag)
|
||||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||||
|
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
|
@ -749,7 +766,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
||||||
// Checks that the top and bottom borders are removed. Each frame should have
|
// Checks that the top and bottom borders are removed. Each frame should have
|
||||||
// solid color equal to frame color.
|
// solid color equal to frame color.
|
||||||
const auto& cropped_frames_outputs =
|
const auto& cropped_frames_outputs =
|
||||||
runner->Outputs().Tag("CROPPED_FRAMES").packets;
|
runner->Outputs().Tag(kCroppedFramesTag).packets;
|
||||||
EXPECT_EQ(cropped_frames_outputs.size(), 1);
|
EXPECT_EQ(cropped_frames_outputs.size(), 1);
|
||||||
const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>();
|
const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>();
|
||||||
const auto cropped_mat = formats::MatView(&cropped_frame);
|
const auto cropped_mat = formats::MatView(&cropped_frame);
|
||||||
|
@ -775,7 +792,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPath) {
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& outputs = runner->Outputs();
|
const auto& outputs = runner->Outputs();
|
||||||
const auto& ext_render_per_frame =
|
const auto& ext_render_per_frame =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||||
|
|
||||||
for (int i = 0; i < num_frames - 1; ++i) {
|
for (int i = 0; i < num_frames - 1; ++i) {
|
||||||
|
@ -813,7 +830,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPath) {
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& outputs = runner->Outputs();
|
const auto& outputs = runner->Outputs();
|
||||||
const auto& ext_render_per_frame =
|
const auto& ext_render_per_frame =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||||
|
|
||||||
for (int i = 0; i < num_frames - 1; ++i) {
|
for (int i = 0; i < num_frames - 1; ++i) {
|
||||||
|
@ -846,7 +863,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPathNoVideo) {
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& outputs = runner->Outputs();
|
const auto& outputs = runner->Outputs();
|
||||||
const auto& ext_render_per_frame =
|
const auto& ext_render_per_frame =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||||
|
|
||||||
for (int i = 0; i < num_frames - 1; ++i) {
|
for (int i = 0; i < num_frames - 1; ++i) {
|
||||||
|
@ -886,7 +903,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPathNoVideo) {
|
||||||
MP_EXPECT_OK(runner->Run());
|
MP_EXPECT_OK(runner->Run());
|
||||||
const auto& outputs = runner->Outputs();
|
const auto& outputs = runner->Outputs();
|
||||||
const auto& ext_render_per_frame =
|
const auto& ext_render_per_frame =
|
||||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||||
|
|
||||||
for (int i = 0; i < num_frames - 1; ++i) {
|
for (int i = 0; i < num_frames - 1; ++i) {
|
||||||
|
|
|
@ -43,6 +43,9 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kIsShotChangeTag[] = "IS_SHOT_CHANGE";
|
||||||
|
constexpr char kVideoTag[] = "VIDEO";
|
||||||
|
|
||||||
const char kConfig[] = R"(
|
const char kConfig[] = R"(
|
||||||
calculator: "ShotBoundaryCalculator"
|
calculator: "ShotBoundaryCalculator"
|
||||||
input_stream: "VIDEO:camera_frames"
|
input_stream: "VIDEO:camera_frames"
|
||||||
|
@ -70,7 +73,7 @@ void AddFrames(const int number_of_frames, const std::set<int>& skip_frames,
|
||||||
if (skip_frames.count(i) < 1) {
|
if (skip_frames.count(i) < 1) {
|
||||||
sub_image.copyTo(frame_area);
|
sub_image.copyTo(frame_area);
|
||||||
}
|
}
|
||||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||||
Adopt(input_frame.release()).At(Timestamp(i * 1000000)));
|
Adopt(input_frame.release()).At(Timestamp(i * 1000000)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -97,7 +100,7 @@ TEST(ShotBoundaryCalculatorTest, NoShotChange) {
|
||||||
|
|
||||||
AddFrames(10, {}, runner.get());
|
AddFrames(10, {}, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckOutput(10, {}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
CheckOutput(10, {}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
|
TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
|
||||||
|
@ -110,7 +113,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
|
||||||
|
|
||||||
AddFrames(20, {10}, runner.get());
|
AddFrames(20, {10}, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckOutput(20, {10}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
CheckOutput(20, {10}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
|
TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
|
||||||
|
@ -123,7 +126,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
|
||||||
|
|
||||||
AddFrames(20, {14, 17}, runner.get());
|
AddFrames(20, {14, 17}, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckOutput(20, {14, 17}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
CheckOutput(20, {14, 17}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
|
TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
|
||||||
|
@ -140,7 +143,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
|
||||||
|
|
||||||
AddFrames(24, {16, 19}, runner.get());
|
AddFrames(24, {16, 19}, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
CheckOutput(24, {16}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
CheckOutput(24, {16}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
|
TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
|
||||||
|
@ -153,7 +156,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
|
||||||
|
|
||||||
AddFrames(20, {15}, runner.get());
|
AddFrames(20, {15}, runner.get());
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
auto output_packets = runner->Outputs().Tag("IS_SHOT_CHANGE").packets;
|
auto output_packets = runner->Outputs().Tag(kIsShotChangeTag).packets;
|
||||||
ASSERT_EQ(output_packets.size(), 1);
|
ASSERT_EQ(output_packets.size(), 1);
|
||||||
ASSERT_EQ(output_packets[0].Get<bool>(), true);
|
ASSERT_EQ(output_packets[0].Get<bool>(), true);
|
||||||
ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000);
|
ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000);
|
||||||
|
|
|
@ -32,6 +32,9 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kOutputTag[] = "OUTPUT";
|
||||||
|
constexpr char kIsShotBoundaryTag[] = "IS_SHOT_BOUNDARY";
|
||||||
|
|
||||||
const char kConfigA[] = R"(
|
const char kConfigA[] = R"(
|
||||||
calculator: "SignalFusingCalculator"
|
calculator: "SignalFusingCalculator"
|
||||||
input_stream: "scene_change"
|
input_stream: "scene_change"
|
||||||
|
@ -160,7 +163,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
|
||||||
|
|
||||||
auto input_shot = absl::make_unique<bool>(false);
|
auto input_shot = absl::make_unique<bool>(false);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("IS_SHOT_BOUNDARY")
|
->Tag(kIsShotBoundaryTag)
|
||||||
.packets.push_back(Adopt(input_shot.release()).At(Timestamp(0)));
|
.packets.push_back(Adopt(input_shot.release()).At(Timestamp(0)));
|
||||||
|
|
||||||
auto input_face =
|
auto input_face =
|
||||||
|
@ -200,7 +203,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("OUTPUT").packets;
|
runner->Outputs().Tag(kOutputTag).packets;
|
||||||
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
||||||
|
|
||||||
ASSERT_EQ(detection_set.detections().size(), 4);
|
ASSERT_EQ(detection_set.detections().size(), 4);
|
||||||
|
@ -251,7 +254,7 @@ TEST(SignalFusingCalculatorTest, TwoInputNoShotLabeledTags) {
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
|
|
||||||
const std::vector<Packet>& output_packets =
|
const std::vector<Packet>& output_packets =
|
||||||
runner->Outputs().Tag("OUTPUT").packets;
|
runner->Outputs().Tag(kOutputTag).packets;
|
||||||
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
||||||
|
|
||||||
ASSERT_EQ(detection_set.detections().size(), 4);
|
ASSERT_EQ(detection_set.detections().size(), 4);
|
||||||
|
|
|
@ -31,6 +31,9 @@ namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
constexpr char kOutputFramesTag[] = "OUTPUT_FRAMES";
|
||||||
|
constexpr char kInputFramesTag[] = "INPUT_FRAMES";
|
||||||
|
|
||||||
// Default configuration of the calculator.
|
// Default configuration of the calculator.
|
||||||
CalculatorGraphConfig::Node GetCalculatorNode(
|
CalculatorGraphConfig::Node GetCalculatorNode(
|
||||||
const std::string& fail_if_any, const std::string& extra_options = "") {
|
const std::string& fail_if_any, const std::string& extra_options = "") {
|
||||||
|
@ -65,10 +68,10 @@ TEST(VideoFilterCalculatorTest, UpperBoundNoPass) {
|
||||||
ImageFormat::SRGB, kFixedWidth,
|
ImageFormat::SRGB, kFixedWidth,
|
||||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("INPUT_FRAMES")
|
->Tag(kInputFramesTag)
|
||||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||||
EXPECT_TRUE(output_packet.empty());
|
EXPECT_TRUE(output_packet.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,10 +91,10 @@ TEST(VerticalFrameRemovalCalculatorTest, UpperBoundPass) {
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("INPUT_FRAMES")
|
->Tag(kInputFramesTag)
|
||||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||||
EXPECT_EQ(1, output_packet.size());
|
EXPECT_EQ(1, output_packet.size());
|
||||||
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
||||||
EXPECT_EQ(kWidth, output_frame.Width());
|
EXPECT_EQ(kWidth, output_frame.Width());
|
||||||
|
@ -114,10 +117,10 @@ TEST(VideoFilterCalculatorTest, LowerBoundNoPass) {
|
||||||
ImageFormat::SRGB, kFixedWidth,
|
ImageFormat::SRGB, kFixedWidth,
|
||||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("INPUT_FRAMES")
|
->Tag(kInputFramesTag)
|
||||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||||
EXPECT_TRUE(output_packet.empty());
|
EXPECT_TRUE(output_packet.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,10 +140,10 @@ TEST(VerticalFrameRemovalCalculatorTest, LowerBoundPass) {
|
||||||
auto input_frame =
|
auto input_frame =
|
||||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("INPUT_FRAMES")
|
->Tag(kInputFramesTag)
|
||||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||||
MP_ASSERT_OK(runner->Run());
|
MP_ASSERT_OK(runner->Run());
|
||||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||||
EXPECT_EQ(1, output_packet.size());
|
EXPECT_EQ(1, output_packet.size());
|
||||||
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
||||||
EXPECT_EQ(kWidth, output_frame.Width());
|
EXPECT_EQ(kWidth, output_frame.Width());
|
||||||
|
@ -164,7 +167,7 @@ TEST(VerticalFrameRemovalCalculatorTest, OutputError) {
|
||||||
ImageFormat::SRGB, kFixedWidth,
|
ImageFormat::SRGB, kFixedWidth,
|
||||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||||
runner->MutableInputs()
|
runner->MutableInputs()
|
||||||
->Tag("INPUT_FRAMES")
|
->Tag(kInputFramesTag)
|
||||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||||
absl::Status status = runner->Run();
|
absl::Status status = runner->Run();
|
||||||
EXPECT_EQ(status.code(), absl::StatusCode::kUnknown);
|
EXPECT_EQ(status.code(), absl::StatusCode::kUnknown);
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#include "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.h"
|
#include "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.h"
|
||||||
|
|
||||||
|
constexpr float kMinVelocity = 0.5;
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace autoflip {
|
namespace autoflip {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -75,6 +77,7 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
||||||
current_position_px_ = position;
|
current_position_px_ = position;
|
||||||
}
|
}
|
||||||
target_position_px_ = position;
|
target_position_px_ = position;
|
||||||
|
prior_position_px_ = current_position_px_;
|
||||||
motion_state_ = false;
|
motion_state_ = false;
|
||||||
mean_delta_t_ = -1;
|
mean_delta_t_ = -1;
|
||||||
raw_positions_at_time_.push_front(
|
raw_positions_at_time_.push_front(
|
||||||
|
@ -106,6 +109,11 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
||||||
options_.reframe_window())
|
options_.reframe_window())
|
||||||
<< "Reframe window cannot exceed min_motion_to_reframe.";
|
<< "Reframe window cannot exceed min_motion_to_reframe.";
|
||||||
}
|
}
|
||||||
|
RET_CHECK(options_.has_max_velocity() ^
|
||||||
|
(options_.has_max_velocity_scale() &&
|
||||||
|
options_.has_max_velocity_shift()))
|
||||||
|
<< "Must either set max_velocity or set both max_velocity_scale and "
|
||||||
|
"max_velocity_shift.";
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,9 +131,29 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
||||||
}
|
}
|
||||||
|
|
||||||
int filtered_position = Median(raw_positions_at_time_);
|
int filtered_position = Median(raw_positions_at_time_);
|
||||||
|
|
||||||
|
float min_reframe = (options_.has_min_motion_to_reframe()
|
||||||
|
? options_.min_motion_to_reframe()
|
||||||
|
: options_.min_motion_to_reframe_lower()) *
|
||||||
|
pixels_per_degree_;
|
||||||
|
float max_reframe = (options_.has_min_motion_to_reframe()
|
||||||
|
? options_.min_motion_to_reframe()
|
||||||
|
: options_.min_motion_to_reframe_upper()) *
|
||||||
|
pixels_per_degree_;
|
||||||
|
|
||||||
|
filtered_position = fmax(min_location_ - min_reframe, filtered_position);
|
||||||
|
filtered_position = fmin(max_location_ + max_reframe, filtered_position);
|
||||||
|
|
||||||
double delta_degs =
|
double delta_degs =
|
||||||
(filtered_position - current_position_px_) / pixels_per_degree_;
|
(filtered_position - current_position_px_) / pixels_per_degree_;
|
||||||
|
|
||||||
|
double max_velocity =
|
||||||
|
options_.has_max_velocity()
|
||||||
|
? options_.max_velocity()
|
||||||
|
: fmax(abs(delta_degs * options_.max_velocity_scale()) +
|
||||||
|
options_.max_velocity_shift(),
|
||||||
|
kMinVelocity);
|
||||||
|
|
||||||
// If the motion is smaller than the min_motion_to_reframe and camera is
|
// If the motion is smaller than the min_motion_to_reframe and camera is
|
||||||
// stationary, don't use the update.
|
// stationary, don't use the update.
|
||||||
if (IsMotionTooSmall(delta_degs) && !motion_state_) {
|
if (IsMotionTooSmall(delta_degs) && !motion_state_) {
|
||||||
|
@ -169,10 +197,9 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
||||||
options_.max_update_rate());
|
options_.max_update_rate());
|
||||||
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
|
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
|
||||||
observed_velocity * update_rate;
|
observed_velocity * update_rate;
|
||||||
// Limited current velocity.
|
current_velocity_deg_per_s_ = updated_velocity > 0
|
||||||
current_velocity_deg_per_s_ =
|
? fmin(updated_velocity, max_velocity)
|
||||||
updated_velocity > 0 ? fmin(updated_velocity, options_.max_velocity())
|
: fmax(updated_velocity, -max_velocity);
|
||||||
: fmax(updated_velocity, -options_.max_velocity());
|
|
||||||
|
|
||||||
// Update prediction based on time input.
|
// Update prediction based on time input.
|
||||||
return UpdatePrediction(time_us);
|
return UpdatePrediction(time_us);
|
||||||
|
@ -182,6 +209,9 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
|
||||||
RET_CHECK(current_time_ < time_us)
|
RET_CHECK(current_time_ < time_us)
|
||||||
<< "Prediction time added before a prior observation or prediction.";
|
<< "Prediction time added before a prior observation or prediction.";
|
||||||
|
|
||||||
|
// Store prior pixel location.
|
||||||
|
prior_position_px_ = current_position_px_;
|
||||||
|
|
||||||
// Position update limited by min/max.
|
// Position update limited by min/max.
|
||||||
double update_position_px =
|
double update_position_px =
|
||||||
current_position_px_ +
|
current_position_px_ +
|
||||||
|
@ -209,7 +239,19 @@ absl::Status KinematicPathSolver::GetState(int* position) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status KinematicPathSolver::SetState(const int position) {
|
absl::Status KinematicPathSolver::GetState(float* position) {
|
||||||
|
RET_CHECK(initialized_) << "GetState called before first observation added.";
|
||||||
|
*position = current_position_px_;
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
|
||||||
|
RET_CHECK(initialized_) << "GetState called before first observation added.";
|
||||||
|
*delta_position = current_position_px_ - prior_position_px_;
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status KinematicPathSolver::SetState(const float position) {
|
||||||
RET_CHECK(initialized_) << "SetState called before first observation added.";
|
RET_CHECK(initialized_) << "SetState called before first observation added.";
|
||||||
current_position_px_ = position;
|
current_position_px_ = position;
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
@ -218,7 +260,15 @@ absl::Status KinematicPathSolver::SetState(const int position) {
|
||||||
absl::Status KinematicPathSolver::GetTargetPosition(int* target_position) {
|
absl::Status KinematicPathSolver::GetTargetPosition(int* target_position) {
|
||||||
RET_CHECK(initialized_)
|
RET_CHECK(initialized_)
|
||||||
<< "GetTargetPosition called before first observation added.";
|
<< "GetTargetPosition called before first observation added.";
|
||||||
*target_position = round(target_position_px_);
|
|
||||||
|
// Provide target position clamped by min/max locations.
|
||||||
|
if (target_position_px_ < min_location_) {
|
||||||
|
*target_position = min_location_;
|
||||||
|
} else if (target_position_px_ > max_location_) {
|
||||||
|
*target_position = max_location_;
|
||||||
|
} else {
|
||||||
|
*target_position = round(target_position_px_);
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,6 +288,7 @@ absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
|
||||||
double updated_distance = max_location - min_location;
|
double updated_distance = max_location - min_location;
|
||||||
double scale_change = updated_distance / prior_distance;
|
double scale_change = updated_distance / prior_distance;
|
||||||
current_position_px_ = current_position_px_ * scale_change;
|
current_position_px_ = current_position_px_ * scale_change;
|
||||||
|
prior_position_px_ = prior_position_px_ * scale_change;
|
||||||
target_position_px_ = target_position_px_ * scale_change;
|
target_position_px_ = target_position_px_ * scale_change;
|
||||||
max_location_ = max_location;
|
max_location_ = max_location;
|
||||||
min_location_ = min_location;
|
min_location_ = min_location;
|
||||||
|
|
|
@ -46,10 +46,12 @@ class KinematicPathSolver {
|
||||||
absl::Status AddObservation(int position, const uint64 time_us);
|
absl::Status AddObservation(int position, const uint64 time_us);
|
||||||
// Get the predicted position at a time.
|
// Get the predicted position at a time.
|
||||||
absl::Status UpdatePrediction(const int64 time_us);
|
absl::Status UpdatePrediction(const int64 time_us);
|
||||||
// Get the state at a time.
|
// Get the state at a time, as an int.
|
||||||
absl::Status GetState(int* position);
|
absl::Status GetState(int* position);
|
||||||
|
// Get the state at a time, as a float.
|
||||||
|
absl::Status GetState(float* position);
|
||||||
// Overwrite the current state value.
|
// Overwrite the current state value.
|
||||||
absl::Status SetState(const int position);
|
absl::Status SetState(const float position);
|
||||||
// Update PixelPerDegree value.
|
// Update PixelPerDegree value.
|
||||||
absl::Status UpdatePixelsPerDegree(const float pixels_per_degree);
|
absl::Status UpdatePixelsPerDegree(const float pixels_per_degree);
|
||||||
// Provide the current target position of the reframe action.
|
// Provide the current target position of the reframe action.
|
||||||
|
@ -66,6 +68,8 @@ class KinematicPathSolver {
|
||||||
// Clear any history buffer of positions that are used when
|
// Clear any history buffer of positions that are used when
|
||||||
// filtering_time_window_us is set to a non-zero value.
|
// filtering_time_window_us is set to a non-zero value.
|
||||||
void ClearHistory();
|
void ClearHistory();
|
||||||
|
// Provides the change in position from last state.
|
||||||
|
absl::Status GetDeltaState(float* delta_position);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Tuning options.
|
// Tuning options.
|
||||||
|
@ -77,6 +81,7 @@ class KinematicPathSolver {
|
||||||
float pixels_per_degree_;
|
float pixels_per_degree_;
|
||||||
// Current state values.
|
// Current state values.
|
||||||
double current_position_px_;
|
double current_position_px_;
|
||||||
|
double prior_position_px_;
|
||||||
double current_velocity_deg_per_s_;
|
double current_velocity_deg_per_s_;
|
||||||
uint64 current_time_;
|
uint64 current_time_;
|
||||||
// History of observations (second) and their time (first).
|
// History of observations (second) and their time (first).
|
||||||
|
|
|
@ -6,8 +6,9 @@ message KinematicOptions {
|
||||||
// Weighted update of new camera velocity (measurement) vs current state
|
// Weighted update of new camera velocity (measurement) vs current state
|
||||||
// (prediction).
|
// (prediction).
|
||||||
optional double update_rate = 1 [default = 0.5, deprecated = true];
|
optional double update_rate = 1 [default = 0.5, deprecated = true];
|
||||||
// Max velocity (degrees per second) that the camera can move.
|
// Max velocity (degrees per second) that the camera can move. Cannot be used
|
||||||
optional double max_velocity = 2 [default = 18];
|
// with max_velocity_scale or max_velocity_shift.
|
||||||
|
optional double max_velocity = 2;
|
||||||
// Min motion (in degrees) to react for both upper and lower directions. Must
|
// Min motion (in degrees) to react for both upper and lower directions. Must
|
||||||
// not be set if using min_motion_to_reframe_lower and
|
// not be set if using min_motion_to_reframe_lower and
|
||||||
// min_motion_to_reframe_upper.
|
// min_motion_to_reframe_upper.
|
||||||
|
@ -30,4 +31,12 @@ message KinematicOptions {
|
||||||
optional int64 filtering_time_window_us = 7 [default = 0];
|
optional int64 filtering_time_window_us = 7 [default = 0];
|
||||||
// Weighted update of average period, used for motion updates.
|
// Weighted update of average period, used for motion updates.
|
||||||
optional float mean_period_update_rate = 8 [default = 0.25];
|
optional float mean_period_update_rate = 8 [default = 0.25];
|
||||||
|
// Scale factor for max velocity, to be multiplied by the distance from center
|
||||||
|
// in degrees. Cannot be used with max_velocity and must be used with
|
||||||
|
// max_velocity_shift.
|
||||||
|
optional float max_velocity_scale = 11;
|
||||||
|
// Shift factor for max velocity, to be added to the scaled distance from
|
||||||
|
// center in degrees. Cannot be used with max_velocity and must be used with
|
||||||
|
// max_velocity_scale.
|
||||||
|
optional float max_velocity_shift = 12;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ TEST(KinematicPathSolverTest, FailZeroPixelsPerDegree) {
|
||||||
TEST(KinematicPathSolverTest, FailNotInitializedState) {
|
TEST(KinematicPathSolverTest, FailNotInitializedState) {
|
||||||
KinematicOptions options;
|
KinematicOptions options;
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
EXPECT_FALSE(solver.GetState(&state).ok());
|
EXPECT_FALSE(solver.GetState(&state).ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,13 +55,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionLargeImg) {
|
||||||
options.set_max_velocity(1000);
|
options.set_max_velocity(1000);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move.
|
// Expect cam to not move.
|
||||||
EXPECT_EQ(state, 500);
|
EXPECT_FLOAT_EQ(state, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
|
TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
|
||||||
|
@ -72,13 +72,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
|
||||||
options.set_max_velocity(500);
|
options.set_max_velocity(500);
|
||||||
// Set degrees / pixel to 8.3
|
// Set degrees / pixel to 8.3
|
||||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||||
// Move target by 10px / 8.3 = 1.2deg
|
// Move target by 10px / 8.3 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move.
|
// Expect cam to not move.
|
||||||
EXPECT_EQ(state, 400);
|
EXPECT_FLOAT_EQ(state, 400);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
||||||
|
@ -90,7 +90,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
||||||
options.set_filtering_time_window_us(3000000);
|
options.set_filtering_time_window_us(3000000);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
||||||
|
@ -98,7 +98,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move.
|
// Expect cam to not move.
|
||||||
EXPECT_EQ(state, 500);
|
EXPECT_FLOAT_EQ(state, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
||||||
|
@ -110,7 +110,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
||||||
options.set_filtering_time_window_us(0);
|
options.set_filtering_time_window_us(0);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
||||||
|
@ -118,7 +118,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move.
|
// Expect cam to not move.
|
||||||
EXPECT_EQ(state, 506);
|
EXPECT_FLOAT_EQ(state, 506.4);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
|
TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
|
||||||
|
@ -130,13 +130,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
|
||||||
options.set_max_velocity(1000);
|
options.set_max_velocity(1000);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move.
|
// Expect cam to move.
|
||||||
EXPECT_EQ(state, 520);
|
EXPECT_FLOAT_EQ(state, 520);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
|
TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
|
||||||
|
@ -148,13 +148,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
// Set degrees / pixel to 8.3
|
// Set degrees / pixel to 8.3
|
||||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||||
// Move target by 10px / 8.3 = 1.2deg
|
// Move target by 10px / 8.3 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move.
|
// Expect cam to move.
|
||||||
EXPECT_EQ(state, 410);
|
EXPECT_FLOAT_EQ(state, 410);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, FailReframeWindowSetting) {
|
TEST(KinematicPathSolverTest, FailReframeWindowSetting) {
|
||||||
|
@ -181,13 +181,13 @@ TEST(KinematicPathSolverTest, PassReframeWindow) {
|
||||||
options.set_reframe_window(0.75);
|
options.set_reframe_window(0.75);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 =
|
// Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 =
|
||||||
EXPECT_EQ(state, 508);
|
EXPECT_FLOAT_EQ(state, 507.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
|
TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
|
||||||
|
@ -202,17 +202,17 @@ TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
|
||||||
options.set_reframe_window(0.75);
|
options.set_reframe_window(0.75);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move
|
// Expect cam to not move
|
||||||
EXPECT_EQ(state, 500);
|
EXPECT_FLOAT_EQ(state, 500);
|
||||||
MP_ASSERT_OK(solver.AddObservation(480, kMicroSecInSec * 2));
|
MP_ASSERT_OK(solver.AddObservation(480, kMicroSecInSec * 2));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move
|
// Expect cam to move
|
||||||
EXPECT_EQ(state, 493);
|
EXPECT_FLOAT_EQ(state, 492.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassCheckState) {
|
TEST(KinematicPathSolverTest, PassCheckState) {
|
||||||
|
@ -241,12 +241,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate30FPS) {
|
||||||
options.set_max_update_rate(0.8);
|
options.set_max_update_rate(0.8);
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// (0.033 / .25) * 20 =
|
// (0.033 / .25) * 20 =
|
||||||
EXPECT_EQ(state, 503);
|
EXPECT_FLOAT_EQ(state, 502.6667);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
|
TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
|
||||||
|
@ -256,12 +256,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
|
||||||
options.set_max_update_rate(0.8);
|
options.set_max_update_rate(0.8);
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// (0.1 / .25) * 20 =
|
// (0.1 / .25) * 20 =
|
||||||
EXPECT_EQ(state, 508);
|
EXPECT_FLOAT_EQ(state, 508);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassUpdateRate) {
|
TEST(KinematicPathSolverTest, PassUpdateRate) {
|
||||||
|
@ -271,7 +271,8 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
|
||||||
options.set_max_update_rate(1.0);
|
options.set_max_update_rate(1.0);
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state, target_position;
|
int target_position;
|
||||||
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||||
EXPECT_EQ(target_position, 500);
|
EXPECT_EQ(target_position, 500);
|
||||||
|
@ -279,7 +280,7 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
|
||||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||||
EXPECT_EQ(target_position, 520);
|
EXPECT_EQ(target_position, 520);
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 505);
|
EXPECT_FLOAT_EQ(state, 505);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
||||||
|
@ -289,7 +290,8 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
||||||
options.set_max_update_rate(1.0);
|
options.set_max_update_rate(1.0);
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state, target_position;
|
int target_position;
|
||||||
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||||
EXPECT_EQ(target_position, 500);
|
EXPECT_EQ(target_position, 500);
|
||||||
|
@ -299,10 +301,10 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
||||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||||
EXPECT_EQ(target_position, 520 * 0.5);
|
EXPECT_EQ(target_position, 520 * 0.5);
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 253);
|
EXPECT_FLOAT_EQ(state, 252.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
TEST(KinematicPathSolverTest, PassMaxVelocityInt) {
|
||||||
KinematicOptions options;
|
KinematicOptions options;
|
||||||
options.set_min_motion_to_reframe(1.0);
|
options.set_min_motion_to_reframe(1.0);
|
||||||
options.set_update_rate(1.0);
|
options.set_update_rate(1.0);
|
||||||
|
@ -315,6 +317,33 @@ TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
||||||
EXPECT_EQ(state, 600);
|
EXPECT_EQ(state, 600);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
||||||
|
KinematicOptions options;
|
||||||
|
options.set_min_motion_to_reframe(1.0);
|
||||||
|
options.set_update_rate(1.0);
|
||||||
|
options.set_max_velocity(6);
|
||||||
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
|
float state;
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
|
||||||
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
|
EXPECT_FLOAT_EQ(state, 600);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(KinematicPathSolverTest, PassMaxVelocityScale) {
|
||||||
|
KinematicOptions options;
|
||||||
|
options.set_min_motion_to_reframe(1.0);
|
||||||
|
options.set_update_rate(1.0);
|
||||||
|
options.set_max_velocity_scale(0.4);
|
||||||
|
options.set_max_velocity_shift(-2.0);
|
||||||
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
|
float state;
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
|
||||||
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
|
EXPECT_FLOAT_EQ(state, 666.6667);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
||||||
KinematicOptions options;
|
KinematicOptions options;
|
||||||
// Set min motion to 2deg
|
// Set min motion to 2deg
|
||||||
|
@ -323,18 +352,18 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
||||||
options.set_max_velocity(1000);
|
options.set_max_velocity(1000);
|
||||||
// Set degrees / pixel to 16.6
|
// Set degrees / pixel to 16.6
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||||
// Move target by 20px / 16.6 = 1.2deg
|
// Move target by 20px / 16.6 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to not move.
|
// Expect cam to not move.
|
||||||
EXPECT_EQ(state, 500);
|
EXPECT_FLOAT_EQ(state, 500);
|
||||||
MP_ASSERT_OK(solver.UpdatePixelsPerDegree(500.0 / kWidthFieldOfView));
|
MP_ASSERT_OK(solver.UpdatePixelsPerDegree(500.0 / kWidthFieldOfView));
|
||||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 2));
|
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 2));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move.
|
// Expect cam to move.
|
||||||
EXPECT_EQ(state, 516);
|
EXPECT_FLOAT_EQ(state, 516);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
||||||
|
@ -344,14 +373,14 @@ TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
||||||
options.set_max_velocity(6);
|
options.set_max_velocity(6);
|
||||||
options.set_mean_period_update_rate(1.0);
|
options.set_mean_period_update_rate(1.0);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 600);
|
EXPECT_FLOAT_EQ(state, 600);
|
||||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 720);
|
EXPECT_FLOAT_EQ(state, 720);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
||||||
|
@ -361,14 +390,14 @@ TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
||||||
options.set_max_velocity(6);
|
options.set_max_velocity(6);
|
||||||
options.set_mean_period_update_rate(0.05);
|
options.set_mean_period_update_rate(0.05);
|
||||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 600);
|
EXPECT_FLOAT_EQ(state, 600);
|
||||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 701);
|
EXPECT_FLOAT_EQ(state, 701);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KinematicPathSolverTest, PassSetPosition) {
|
TEST(KinematicPathSolverTest, PassSetPosition) {
|
||||||
|
@ -380,16 +409,30 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
|
||||||
options.set_max_velocity(18);
|
options.set_max_velocity(18);
|
||||||
// Set degrees / pixel to 8.3
|
// Set degrees / pixel to 8.3
|
||||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||||
int state;
|
float state;
|
||||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||||
// Move target by 10px / 8.3 = 1.2deg
|
// Move target by 10px / 8.3 = 1.2deg
|
||||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
// Expect cam to move.
|
// Expect cam to move.
|
||||||
EXPECT_EQ(state, 410);
|
EXPECT_FLOAT_EQ(state, 410);
|
||||||
MP_ASSERT_OK(solver.SetState(400));
|
MP_ASSERT_OK(solver.SetState(400));
|
||||||
MP_ASSERT_OK(solver.GetState(&state));
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
EXPECT_EQ(state, 400);
|
EXPECT_FLOAT_EQ(state, 400);
|
||||||
|
}
|
||||||
|
TEST(KinematicPathSolverTest, PassBorderTest) {
|
||||||
|
KinematicOptions options;
|
||||||
|
options.set_min_motion_to_reframe(1.0);
|
||||||
|
options.set_max_update_rate(0.25);
|
||||||
|
options.set_max_velocity_scale(0.5);
|
||||||
|
options.set_max_velocity_shift(-1.0);
|
||||||
|
|
||||||
|
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||||
|
float state;
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||||
|
MP_ASSERT_OK(solver.AddObservation(800, kMicroSecInSec * 0.1));
|
||||||
|
MP_ASSERT_OK(solver.GetState(&state));
|
||||||
|
EXPECT_FLOAT_EQ(state, 404.56668);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -148,18 +148,18 @@ class SourceImpl {
|
||||||
|
|
||||||
explicit SourceImpl(std::vector<std::unique_ptr<Base>>* vec)
|
explicit SourceImpl(std::vector<std::unique_ptr<Base>>* vec)
|
||||||
: SourceImpl(&GetWithAutoGrow(vec, 0)) {}
|
: SourceImpl(&GetWithAutoGrow(vec, 0)) {}
|
||||||
explicit SourceImpl(SourceBase* base) : base_(*base) {}
|
explicit SourceImpl(SourceBase* base) : base_(base) {}
|
||||||
|
|
||||||
template <typename U,
|
template <typename U,
|
||||||
typename std::enable_if<AllowConnection<U>{}, int>::type = 0>
|
typename std::enable_if<AllowConnection<U>{}, int>::type = 0>
|
||||||
Src& AddTarget(const Dst<U>& dest) {
|
Src& AddTarget(const Dst<U>& dest) {
|
||||||
CHECK(dest.base_.source == nullptr);
|
CHECK(dest.base_.source == nullptr);
|
||||||
dest.base_.source = &base_;
|
dest.base_.source = base_;
|
||||||
base_.dests_.emplace_back(&dest.base_);
|
base_->dests_.emplace_back(&dest.base_);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
Src& SetName(std::string name) {
|
Src& SetName(std::string name) {
|
||||||
base_.name_ = std::move(name);
|
base_->name_ = std::move(name);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template <typename U>
|
template <typename U>
|
||||||
|
@ -168,7 +168,8 @@ class SourceImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SourceBase& base_;
|
// Never null.
|
||||||
|
SourceBase* base_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool IsSide, typename T>
|
template <bool IsSide, typename T>
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#include "mediapipe/framework/api2/builder.h"
|
#include "mediapipe/framework/api2/builder.h"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include "absl/strings/substitute.h"
|
#include "absl/strings/substitute.h"
|
||||||
#include "mediapipe/framework/api2/node.h"
|
#include "mediapipe/framework/api2/node.h"
|
||||||
#include "mediapipe/framework/api2/packet.h"
|
#include "mediapipe/framework/api2/packet.h"
|
||||||
|
@ -46,6 +48,88 @@ TEST(BuilderTest, BuildGraph) {
|
||||||
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(BuilderTest, CopyableSource) {
|
||||||
|
builder::Graph graph;
|
||||||
|
builder::Source<false, int> a = graph[Input<int>("A")];
|
||||||
|
a.SetName("a");
|
||||||
|
builder::Source<false, int> b = graph[Input<int>("B")];
|
||||||
|
b.SetName("b");
|
||||||
|
builder::SideSource<false, float> side_a = graph[SideInput<float>("SIDE_A")];
|
||||||
|
side_a.SetName("side_a");
|
||||||
|
builder::SideSource<false, float> side_b = graph[SideInput<float>("SIDE_B")];
|
||||||
|
side_b.SetName("side_b");
|
||||||
|
builder::Destination<false, int> out = graph[Output<int>("OUT")];
|
||||||
|
builder::SideDestination<false, float> side_out =
|
||||||
|
graph[SideOutput<float>("SIDE_OUT")];
|
||||||
|
|
||||||
|
builder::Source<false, int> input = a;
|
||||||
|
input = b;
|
||||||
|
builder::SideSource<false, float> side_input = side_b;
|
||||||
|
side_input = side_a;
|
||||||
|
|
||||||
|
input >> out;
|
||||||
|
side_input >> side_out;
|
||||||
|
|
||||||
|
CalculatorGraphConfig expected =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||||
|
input_stream: "A:a"
|
||||||
|
input_stream: "B:b"
|
||||||
|
output_stream: "OUT:b"
|
||||||
|
input_side_packet: "SIDE_A:side_a"
|
||||||
|
input_side_packet: "SIDE_B:side_b"
|
||||||
|
output_side_packet: "SIDE_OUT:side_a"
|
||||||
|
)pb");
|
||||||
|
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BuilderTest, BuildGraphWithFunctions) {
|
||||||
|
builder::Graph graph;
|
||||||
|
|
||||||
|
builder::Source<false, int> base = graph[Input<int>("IN")];
|
||||||
|
base.SetName("base");
|
||||||
|
builder::SideSource<false, float> side = graph[SideInput<float>("SIDE")];
|
||||||
|
side.SetName("side");
|
||||||
|
|
||||||
|
auto foo_fn = [](builder::Source<false, int> base,
|
||||||
|
builder::SideSource<false, float> side,
|
||||||
|
builder::Graph& graph) {
|
||||||
|
auto& foo = graph.AddNode("Foo");
|
||||||
|
base >> foo[Input<int>("BASE")];
|
||||||
|
side >> foo[SideInput<float>("SIDE")];
|
||||||
|
return foo[Output<double>("OUT")];
|
||||||
|
};
|
||||||
|
builder::Source<false, double> foo_out = foo_fn(base, side, graph);
|
||||||
|
|
||||||
|
auto bar_fn = [](builder::Source<false, double> in, builder::Graph& graph) {
|
||||||
|
auto& bar = graph.AddNode("Bar");
|
||||||
|
in >> bar[Input<double>("IN")];
|
||||||
|
return bar[Output<double>("OUT")];
|
||||||
|
};
|
||||||
|
builder::Source<false, double> bar_out = bar_fn(foo_out, graph);
|
||||||
|
bar_out.SetName("out");
|
||||||
|
|
||||||
|
bar_out >> graph[Output<double>("OUT")];
|
||||||
|
|
||||||
|
CalculatorGraphConfig expected =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||||
|
input_stream: "IN:base"
|
||||||
|
input_side_packet: "SIDE:side"
|
||||||
|
output_stream: "OUT:out"
|
||||||
|
node {
|
||||||
|
calculator: "Foo"
|
||||||
|
input_stream: "BASE:base"
|
||||||
|
input_side_packet: "SIDE:side"
|
||||||
|
output_stream: "OUT:__stream_0"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "Bar"
|
||||||
|
input_stream: "IN:__stream_0"
|
||||||
|
output_stream: "OUT:out"
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||||
|
}
|
||||||
|
|
||||||
template <class FooT>
|
template <class FooT>
|
||||||
void BuildGraphTypedTest() {
|
void BuildGraphTypedTest() {
|
||||||
builder::Graph graph;
|
builder::Graph graph;
|
||||||
|
|
|
@ -1666,6 +1666,7 @@ TemplateParser::Parser::Parser()
|
||||||
allow_partial_(false),
|
allow_partial_(false),
|
||||||
allow_case_insensitive_field_(false),
|
allow_case_insensitive_field_(false),
|
||||||
allow_unknown_field_(false),
|
allow_unknown_field_(false),
|
||||||
|
allow_unknown_extension_(true),
|
||||||
allow_unknown_enum_(false),
|
allow_unknown_enum_(false),
|
||||||
allow_field_number_(false),
|
allow_field_number_(false),
|
||||||
allow_relaxed_whitespace_(false),
|
allow_relaxed_whitespace_(false),
|
||||||
|
@ -1683,12 +1684,11 @@ bool TemplateParser::Parser::Parse(io::ZeroCopyInputStream* input,
|
||||||
allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
|
allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
|
||||||
: ParserImpl::FORBID_SINGULAR_OVERWRITES;
|
: ParserImpl::FORBID_SINGULAR_OVERWRITES;
|
||||||
|
|
||||||
bool allow_unknown_extension = true;
|
|
||||||
int recursion_limit = std::numeric_limits<int>::max();
|
int recursion_limit = std::numeric_limits<int>::max();
|
||||||
MediaPipeParserImpl parser(
|
MediaPipeParserImpl parser(
|
||||||
output->GetDescriptor(), input, error_collector_, finder_,
|
output->GetDescriptor(), input, error_collector_, finder_,
|
||||||
parse_info_tree_, overwrites_policy, allow_case_insensitive_field_,
|
parse_info_tree_, overwrites_policy, allow_case_insensitive_field_,
|
||||||
allow_unknown_field_, allow_unknown_extension, allow_unknown_enum_,
|
allow_unknown_field_, allow_unknown_extension_, allow_unknown_enum_,
|
||||||
allow_field_number_, allow_relaxed_whitespace_, allow_partial_,
|
allow_field_number_, allow_relaxed_whitespace_, allow_partial_,
|
||||||
recursion_limit);
|
recursion_limit);
|
||||||
return MergeUsingImpl(input, output, &parser);
|
return MergeUsingImpl(input, output, &parser);
|
||||||
|
@ -1702,13 +1702,12 @@ bool TemplateParser::Parser::ParseFromString(const std::string& input,
|
||||||
|
|
||||||
bool TemplateParser::Parser::Merge(io::ZeroCopyInputStream* input,
|
bool TemplateParser::Parser::Merge(io::ZeroCopyInputStream* input,
|
||||||
Message* output) {
|
Message* output) {
|
||||||
bool allow_unknown_extension = true;
|
|
||||||
int recursion_limit = std::numeric_limits<int>::max();
|
int recursion_limit = std::numeric_limits<int>::max();
|
||||||
MediaPipeParserImpl parser(
|
MediaPipeParserImpl parser(
|
||||||
output->GetDescriptor(), input, error_collector_, finder_,
|
output->GetDescriptor(), input, error_collector_, finder_,
|
||||||
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
||||||
allow_case_insensitive_field_, allow_unknown_field_,
|
allow_case_insensitive_field_, allow_unknown_field_,
|
||||||
allow_unknown_extension, allow_unknown_enum_, allow_field_number_,
|
allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
|
||||||
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
||||||
return MergeUsingImpl(input, output, &parser);
|
return MergeUsingImpl(input, output, &parser);
|
||||||
}
|
}
|
||||||
|
@ -1737,13 +1736,12 @@ bool TemplateParser::Parser::MergeUsingImpl(
|
||||||
bool TemplateParser::Parser::ParseFieldValueFromString(
|
bool TemplateParser::Parser::ParseFieldValueFromString(
|
||||||
const std::string& input, const FieldDescriptor* field, Message* output) {
|
const std::string& input, const FieldDescriptor* field, Message* output) {
|
||||||
io::ArrayInputStream input_stream(input.data(), input.size());
|
io::ArrayInputStream input_stream(input.data(), input.size());
|
||||||
bool allow_unknown_extension = true;
|
|
||||||
int recursion_limit = std::numeric_limits<int>::max();
|
int recursion_limit = std::numeric_limits<int>::max();
|
||||||
ParserImpl parser(
|
ParserImpl parser(
|
||||||
output->GetDescriptor(), &input_stream, error_collector_, finder_,
|
output->GetDescriptor(), &input_stream, error_collector_, finder_,
|
||||||
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
||||||
allow_case_insensitive_field_, allow_unknown_field_,
|
allow_case_insensitive_field_, allow_unknown_field_,
|
||||||
allow_unknown_extension, allow_unknown_enum_, allow_field_number_,
|
allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
|
||||||
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
||||||
return parser.ParseField(field, output);
|
return parser.ParseField(field, output);
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,10 @@ class TemplateParser {
|
||||||
Parser();
|
Parser();
|
||||||
~Parser();
|
~Parser();
|
||||||
|
|
||||||
|
void set_allow_unknown_extension(bool allow_unknown_extension) {
|
||||||
|
allow_unknown_extension_ = allow_unknown_extension;
|
||||||
|
}
|
||||||
|
|
||||||
// Like TextFormat::Parse().
|
// Like TextFormat::Parse().
|
||||||
bool Parse(proto_ns::io::ZeroCopyInputStream* input,
|
bool Parse(proto_ns::io::ZeroCopyInputStream* input,
|
||||||
proto_ns::Message* output);
|
proto_ns::Message* output);
|
||||||
|
@ -99,6 +103,7 @@ class TemplateParser {
|
||||||
bool allow_partial_;
|
bool allow_partial_;
|
||||||
bool allow_case_insensitive_field_;
|
bool allow_case_insensitive_field_;
|
||||||
bool allow_unknown_field_;
|
bool allow_unknown_field_;
|
||||||
|
bool allow_unknown_extension_;
|
||||||
bool allow_unknown_enum_;
|
bool allow_unknown_enum_;
|
||||||
bool allow_field_number_;
|
bool allow_field_number_;
|
||||||
bool allow_relaxed_whitespace_;
|
bool allow_relaxed_whitespace_;
|
||||||
|
|
|
@ -34,6 +34,13 @@ typedef int DimensionsPacketType[2];
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
|
constexpr char kLeftRightPaddingTag[] = "LEFT_RIGHT_PADDING";
|
||||||
|
constexpr char kTopBottomPaddingTag[] = "TOP_BOTTOM_PADDING";
|
||||||
|
constexpr char kOptionsTag[] = "OPTIONS";
|
||||||
|
constexpr char kOutputDimensionsTag[] = "OUTPUT_DIMENSIONS";
|
||||||
|
constexpr char kRotationTag[] = "ROTATION";
|
||||||
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
|
||||||
using Image = mediapipe::Image;
|
using Image = mediapipe::Image;
|
||||||
|
|
||||||
// Scales, rotates, horizontal or vertical flips the image.
|
// Scales, rotates, horizontal or vertical flips the image.
|
||||||
|
@ -102,41 +109,41 @@ REGISTER_CALCULATOR(GlScalerCalculator);
|
||||||
|
|
||||||
// static
|
// static
|
||||||
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
|
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
|
||||||
if (cc->Inputs().HasTag("IMAGE")) {
|
if (cc->Inputs().HasTag(kImageTag)) {
|
||||||
cc->Inputs().Tag("IMAGE").Set<Image>();
|
cc->Inputs().Tag(kImageTag).Set<Image>();
|
||||||
} else {
|
} else {
|
||||||
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
|
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
|
||||||
}
|
}
|
||||||
if (cc->Outputs().HasTag("IMAGE")) {
|
if (cc->Outputs().HasTag(kImageTag)) {
|
||||||
cc->Outputs().Tag("IMAGE").Set<Image>();
|
cc->Outputs().Tag(kImageTag).Set<Image>();
|
||||||
} else {
|
} else {
|
||||||
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
|
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc->Inputs().HasTag("ROTATION")) {
|
if (cc->Inputs().HasTag(kRotationTag)) {
|
||||||
cc->Inputs().Tag("ROTATION").Set<int>();
|
cc->Inputs().Tag(kRotationTag).Set<int>();
|
||||||
}
|
}
|
||||||
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
||||||
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>();
|
cc->Inputs().Tag(kOutputDimensionsTag).Set<DimensionsPacketType>();
|
||||||
}
|
}
|
||||||
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
|
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
|
||||||
|
|
||||||
if (cc->InputSidePackets().HasTag("OPTIONS")) {
|
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
|
||||||
cc->InputSidePackets().Tag("OPTIONS").Set<GlScalerCalculatorOptions>();
|
cc->InputSidePackets().Tag(kOptionsTag).Set<GlScalerCalculatorOptions>();
|
||||||
}
|
}
|
||||||
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
|
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
|
||||||
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
|
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
|
||||||
.Set<DimensionsPacketType>();
|
.Set<DimensionsPacketType>();
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("ROTATION")) {
|
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
||||||
// Counterclockwise rotation.
|
// Counterclockwise rotation.
|
||||||
cc->InputSidePackets().Tag("ROTATION").Set<int>();
|
cc->InputSidePackets().Tag(kRotationTag).Set<int>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") &&
|
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
||||||
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) {
|
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
||||||
cc->Outputs().Tag("TOP_BOTTOM_PADDING").Set<float>();
|
cc->Outputs().Tag(kTopBottomPaddingTag).Set<float>();
|
||||||
cc->Outputs().Tag("LEFT_RIGHT_PADDING").Set<float>();
|
cc->Outputs().Tag(kLeftRightPaddingTag).Set<float>();
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
@ -187,8 +194,8 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
|
||||||
dst_width_ = dimensions[0];
|
dst_width_ = dimensions[0];
|
||||||
dst_height_ = dimensions[1];
|
dst_height_ = dimensions[1];
|
||||||
}
|
}
|
||||||
if (cc->InputSidePackets().HasTag("ROTATION")) {
|
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
||||||
rotation_ccw = cc->InputSidePackets().Tag("ROTATION").Get<int>();
|
rotation_ccw = cc->InputSidePackets().Tag(kRotationTag).Get<int>();
|
||||||
}
|
}
|
||||||
|
|
||||||
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
||||||
|
@ -197,22 +204,22 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
||||||
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
||||||
if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) {
|
if (cc->Inputs().Tag(kOutputDimensionsTag).IsEmpty()) {
|
||||||
// OUTPUT_DIMENSIONS input stream is specified, but value is missing.
|
// OUTPUT_DIMENSIONS input stream is specified, but value is missing.
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& dimensions =
|
const auto& dimensions =
|
||||||
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<DimensionsPacketType>();
|
cc->Inputs().Tag(kOutputDimensionsTag).Get<DimensionsPacketType>();
|
||||||
dst_width_ = dimensions[0];
|
dst_width_ = dimensions[0];
|
||||||
dst_height_ = dimensions[1];
|
dst_height_ = dimensions[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
return helper_.RunInGlContext([this, cc]() -> absl::Status {
|
return helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||||
const auto& input =
|
const auto& input =
|
||||||
cc->Inputs().HasTag("IMAGE")
|
cc->Inputs().HasTag(kImageTag)
|
||||||
? cc->Inputs().Tag("IMAGE").Get<Image>().GetGpuBuffer()
|
? cc->Inputs().Tag(kImageTag).Get<Image>().GetGpuBuffer()
|
||||||
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
|
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
|
||||||
QuadRenderer* renderer = nullptr;
|
QuadRenderer* renderer = nullptr;
|
||||||
GlTexture src1;
|
GlTexture src1;
|
||||||
|
@ -254,8 +261,8 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
||||||
RET_CHECK(renderer) << "Unsupported input texture type";
|
RET_CHECK(renderer) << "Unsupported input texture type";
|
||||||
|
|
||||||
// Override input side packet if ROTATION input packet is provided.
|
// Override input side packet if ROTATION input packet is provided.
|
||||||
if (cc->Inputs().HasTag("ROTATION")) {
|
if (cc->Inputs().HasTag(kRotationTag)) {
|
||||||
int rotation_ccw = cc->Inputs().Tag("ROTATION").Get<int>();
|
int rotation_ccw = cc->Inputs().Tag(kRotationTag).Get<int>();
|
||||||
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,18 +270,18 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
||||||
int dst_height;
|
int dst_height;
|
||||||
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
|
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") &&
|
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
||||||
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) {
|
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
||||||
float top_bottom_padding;
|
float top_bottom_padding;
|
||||||
float left_right_padding;
|
float left_right_padding;
|
||||||
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
|
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
|
||||||
&top_bottom_padding, &left_right_padding);
|
&top_bottom_padding, &left_right_padding);
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("TOP_BOTTOM_PADDING")
|
.Tag(kTopBottomPaddingTag)
|
||||||
.AddPacket(
|
.AddPacket(
|
||||||
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
|
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
|
||||||
cc->Outputs()
|
cc->Outputs()
|
||||||
.Tag("LEFT_RIGHT_PADDING")
|
.Tag(kLeftRightPaddingTag)
|
||||||
.AddPacket(
|
.AddPacket(
|
||||||
MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
|
MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
|
@ -304,9 +311,9 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
||||||
|
|
||||||
glFlush();
|
glFlush();
|
||||||
|
|
||||||
if (cc->Outputs().HasTag("IMAGE")) {
|
if (cc->Outputs().HasTag(kImageTag)) {
|
||||||
auto output = dst.GetFrame<Image>();
|
auto output = dst.GetFrame<Image>();
|
||||||
cc->Outputs().Tag("IMAGE").Add(output.release(), cc->InputTimestamp());
|
cc->Outputs().Tag(kImageTag).Add(output.release(), cc->InputTimestamp());
|
||||||
} else {
|
} else {
|
||||||
auto output = dst.GetFrame<GpuBuffer>();
|
auto output = dst.GetFrame<GpuBuffer>();
|
||||||
TagOrIndex(&cc->Outputs(), "VIDEO", 0)
|
TagOrIndex(&cc->Outputs(), "VIDEO", 0)
|
||||||
|
|
|
@ -24,6 +24,7 @@ package(default_visibility = ["//visibility:public"])
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "pose_tracking_gpu_deps",
|
name = "pose_tracking_gpu_deps",
|
||||||
deps = [
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_gpu",
|
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_gpu",
|
||||||
"//mediapipe/modules/pose_landmark:pose_landmark_gpu",
|
"//mediapipe/modules/pose_landmark:pose_landmark_gpu",
|
||||||
|
@ -40,6 +41,7 @@ mediapipe_binary_graph(
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "pose_tracking_cpu_deps",
|
name = "pose_tracking_cpu_deps",
|
||||||
deps = [
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
|
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
|
||||||
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
||||||
|
|
|
@ -8,6 +8,17 @@ output_stream: "output_video"
|
||||||
# Pose landmarks. (NormalizedLandmarkList)
|
# Pose landmarks. (NormalizedLandmarkList)
|
||||||
output_stream: "pose_landmarks"
|
output_stream: "pose_landmarks"
|
||||||
|
|
||||||
|
# Generates side packet to enable segmentation.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:enable_segmentation"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Throttles the images flowing downstream for flow control. It passes through
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
# the very first incoming image unaltered, and waits for downstream nodes
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
@ -32,8 +43,10 @@ node {
|
||||||
# Subgraph that detects poses and corresponding landmarks.
|
# Subgraph that detects poses and corresponding landmarks.
|
||||||
node {
|
node {
|
||||||
calculator: "PoseLandmarkCpu"
|
calculator: "PoseLandmarkCpu"
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
output_stream: "LANDMARKS:pose_landmarks"
|
output_stream: "LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
output_stream: "DETECTION:pose_detection"
|
output_stream: "DETECTION:pose_detection"
|
||||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||||
}
|
}
|
||||||
|
@ -43,7 +56,8 @@ node {
|
||||||
calculator: "PoseRendererCpu"
|
calculator: "PoseRendererCpu"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
input_stream: "LANDMARKS:pose_landmarks"
|
input_stream: "LANDMARKS:pose_landmarks"
|
||||||
input_stream: "ROI:roi_from_landmarks"
|
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
input_stream: "DETECTION:pose_detection"
|
input_stream: "DETECTION:pose_detection"
|
||||||
|
input_stream: "ROI:roi_from_landmarks"
|
||||||
output_stream: "IMAGE:output_video"
|
output_stream: "IMAGE:output_video"
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,17 @@ output_stream: "output_video"
|
||||||
# Pose landmarks. (NormalizedLandmarkList)
|
# Pose landmarks. (NormalizedLandmarkList)
|
||||||
output_stream: "pose_landmarks"
|
output_stream: "pose_landmarks"
|
||||||
|
|
||||||
|
# Generates side packet to enable segmentation.
|
||||||
|
node {
|
||||||
|
calculator: "ConstantSidePacketCalculator"
|
||||||
|
output_side_packet: "PACKET:enable_segmentation"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||||
|
packet { bool_value: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Throttles the images flowing downstream for flow control. It passes through
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
# the very first incoming image unaltered, and waits for downstream nodes
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
@ -32,8 +43,10 @@ node {
|
||||||
# Subgraph that detects poses and corresponding landmarks.
|
# Subgraph that detects poses and corresponding landmarks.
|
||||||
node {
|
node {
|
||||||
calculator: "PoseLandmarkGpu"
|
calculator: "PoseLandmarkGpu"
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
output_stream: "LANDMARKS:pose_landmarks"
|
output_stream: "LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
output_stream: "DETECTION:pose_detection"
|
output_stream: "DETECTION:pose_detection"
|
||||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||||
}
|
}
|
||||||
|
@ -43,7 +56,8 @@ node {
|
||||||
calculator: "PoseRendererGpu"
|
calculator: "PoseRendererGpu"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
input_stream: "LANDMARKS:pose_landmarks"
|
input_stream: "LANDMARKS:pose_landmarks"
|
||||||
input_stream: "ROI:roi_from_landmarks"
|
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
input_stream: "DETECTION:pose_detection"
|
input_stream: "DETECTION:pose_detection"
|
||||||
|
input_stream: "ROI:roi_from_landmarks"
|
||||||
output_stream: "IMAGE:output_video"
|
output_stream: "IMAGE:output_video"
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ mediapipe_simple_subgraph(
|
||||||
register_as = "PoseRendererGpu",
|
register_as = "PoseRendererGpu",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/image:recolor_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
@ -41,6 +42,7 @@ mediapipe_simple_subgraph(
|
||||||
register_as = "PoseRendererCpu",
|
register_as = "PoseRendererCpu",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/image:recolor_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user